/*
 * Decompiled with CFR 0.152.
 */
package edu.iu.nwb.preprocessing.duplicatenodedetector;

import edu.iu.nwb.preprocessing.duplicatenodedetector.util.GraphSearchAlgorithms;
import edu.iu.nwb.preprocessing.duplicatenodedetector.util.ListMap;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Dictionary;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import org.cishell.framework.algorithm.Algorithm;
import org.cishell.framework.algorithm.AlgorithmExecutionException;
import org.cishell.framework.data.BasicData;
import org.cishell.framework.data.Data;
import org.cishell.utilities.TableUtilities;
import prefuse.data.Graph;
import prefuse.data.Node;
import prefuse.data.Schema;
import prefuse.data.Table;
import prefuse.data.Tuple;
import prefuse.util.collections.IntIterator;
import uk.ac.shef.wit.simmetrics.similaritymetrics.AbstractStringMetric;
import uk.ac.shef.wit.simmetrics.similaritymetrics.Jaro;

public class DuplicateNodeDetectorAlgorithm
implements Algorithm {
    private static final String TEXT_TYPE = "file:text/plain";
    public static final String SIMILARITY_COLUMN_NAME = "similarity";
    public static final String UNIQUE_INDEX_COLUMN_NAME = "uniqueIndex";
    public static final String COMBINE_VALUES_COLUMN_NAME = "combineValues";
    public static final String NOT_THE_PRIMARY_NODE = "";
    private final AbstractStringMetric similarityChecker = new Jaro();
    private Data inputData;
    private String compareAttributeName;
    private float mergeOnSimilarity;
    private float makeNoteOnSimilarity;
    private int numPrefixLetters;

    public DuplicateNodeDetectorAlgorithm(Data[] data, Dictionary parameters) {
        this.inputData = data[0];
        this.compareAttributeName = (String)parameters.get("compareAttribute");
        this.mergeOnSimilarity = ((Float)parameters.get("mergeOnSimilarity")).floatValue();
        this.makeNoteOnSimilarity = ((Float)parameters.get("makeNoteOnSimilarity")).floatValue();
        this.numPrefixLetters = (Integer)parameters.get("numPrefixLetters");
    }

    public Data[] execute() throws AlgorithmExecutionException {
        Graph inputGraph = (Graph)this.inputData.getData();
        Table nodeTable = this.constructAlteredNodeTable(inputGraph);
        StringBuffer noteLog = new StringBuffer();
        Graph mergeGraph = this.makeMergeGraph(nodeTable, noteLog);
        StringBuffer mergeLog = new StringBuffer();
        Table mergeTable = this.createTableWithMergeInfo(nodeTable, mergeGraph, mergeLog);
        Data[] mergeTableAndLogData = this.formatAsData(mergeTable, noteLog, mergeLog);
        return mergeTableAndLogData;
    }

    private Table constructAlteredNodeTable(Graph graph) {
        Table outputTable = new Table();
        outputTable = this.createTableSchema(graph.getNodeTable().getSchema(), outputTable);
        outputTable = this.populateTable(outputTable, graph);
        return outputTable;
    }

    private Graph makeMergeGraph(Table nodeTable, StringBuffer noteLog) {
        Graph mergeGraph = this.makeEmptyMergeGraph(nodeTable);
        ListMap groupedNodes = this.sortNodesByAttributePrefix(nodeTable, this.compareAttributeName, this.numPrefixLetters);
        ArrayList<SimilarityReport> noteworthySimilarityReports = new ArrayList<SimilarityReport>();
        for (List nodeGroup : groupedNodes.values()) {
            int ii = 0;
            while (ii < nodeGroup.size()) {
                Integer firstNodeIndex = (Integer)nodeGroup.get(ii);
                int jj = ii;
                while (jj < nodeGroup.size()) {
                    Integer secondNodeIndex = (Integer)nodeGroup.get(jj);
                    float similarity = this.compareNodesCaseInsensitiveBy(this.compareAttributeName, firstNodeIndex, secondNodeIndex, nodeTable);
                    if (similarity >= this.mergeOnSimilarity) {
                        mergeGraph.addEdge(firstNodeIndex.intValue(), secondNodeIndex.intValue());
                    } else if (similarity >= this.makeNoteOnSimilarity) {
                        String leftName = nodeTable.getString(firstNodeIndex.intValue(), this.compareAttributeName);
                        String rightName = nodeTable.getString(secondNodeIndex.intValue(), this.compareAttributeName);
                        noteworthySimilarityReports.add(new SimilarityReport(leftName, rightName, similarity));
                    }
                    ++jj;
                }
                ++ii;
            }
        }
        if (!noteworthySimilarityReports.isEmpty()) {
            Collections.sort(noteworthySimilarityReports);
            Collections.reverse(noteworthySimilarityReports);
            for (SimilarityReport report : noteworthySimilarityReports) {
                noteLog.append(report.similarity + " similar:" + "\r\n");
                noteLog.append("  \"" + report.leftName + "\"" + "\r\n");
                noteLog.append("  \"" + report.rightName + "\"" + "\r\n");
            }
        }
        return mergeGraph;
    }

    private Table createTableWithMergeInfo(Table oldNodeTable, Graph mergeGraph, StringBuffer mergeLog) {
        Table newNodeTable = TableUtilities.copyTable((Table)oldNodeTable);
        List clusters = this.extractWeakComponentClusters(mergeGraph);
        mergeLog.append("Merge report\n");
        mergeLog.append("Similarly named entities will be merged into the one with the longest name.\n\n");
        int mergeReportIndex = 1;
        for (Collection cluster : clusters) {
            if (cluster.size() <= 1) continue;
            StringBuffer mergeLogPiece = this.setMergeInfoForCluster(newNodeTable, cluster, mergeReportIndex);
            mergeLog.append(mergeLogPiece);
            ++mergeReportIndex;
        }
        mergeLog.append("End of merge report.\n");
        return newNodeTable;
    }

    private StringBuffer setMergeInfoForCluster(Table newNodeTable, Collection cluster, int mergeReportIndex) {
        StringBuffer mLog = new StringBuffer();
        mLog.append("======== Merge " + mergeReportIndex + " ========" + "\n");
        Integer primaryNode = this.selectNodeWithLongestAttributeValue(newNodeTable, cluster, this.compareAttributeName);
        String primaryNodeName = newNodeTable.getString(primaryNode.intValue(), this.compareAttributeName);
        int uniqueIndex = primaryNode + 1;
        mLog.append(String.valueOf(primaryNodeName) + " will have the following merged in:" + "\n");
        for (Integer node : cluster) {
            if (primaryNode.equals(node)) continue;
            String name = this.setMergeInfoForNode(newNodeTable, node, uniqueIndex);
            mLog.append(String.valueOf(name) + "\n");
        }
        mLog.append("\n");
        return mLog;
    }

    private String setMergeInfoForNode(Table newNodeTable, Integer node, int uniqueIndex) {
        newNodeTable.setInt(node.intValue(), UNIQUE_INDEX_COLUMN_NAME, uniqueIndex);
        newNodeTable.setString(node.intValue(), COMBINE_VALUES_COLUMN_NAME, NOT_THE_PRIMARY_NODE);
        String name = newNodeTable.getString(node.intValue(), this.compareAttributeName);
        return name;
    }

    private Integer selectNodeWithLongestAttributeValue(Table table, Collection nodeIndices, String attributeKey) {
        if (nodeIndices.isEmpty()) {
            throw new IllegalArgumentException("Must give at least one node.");
        }
        int longestAttributeValue = Integer.MIN_VALUE;
        Integer winningNodeIndex = null;
        for (Integer nodeIndex : nodeIndices) {
            String attributeValue;
            if (!table.canGetString(attributeKey) || (attributeValue = table.getString(nodeIndex.intValue(), attributeKey)).length() <= longestAttributeValue) continue;
            longestAttributeValue = attributeValue.length();
            winningNodeIndex = nodeIndex;
        }
        return winningNodeIndex;
    }

    private Data[] formatAsData(Table nodeTable, StringBuffer noteLog, StringBuffer mergeLog) throws AlgorithmExecutionException {
        BasicData nodeTableData = new BasicData((Object)nodeTable, Table.class.getName());
        Dictionary tableAttributes = nodeTableData.getMetadata();
        tableAttributes.put("Modified", new Boolean(true));
        tableAttributes.put("Parent", this.inputData);
        tableAttributes.put("Type", "Matrix");
        tableAttributes.put("Label", "Merge Table: based on " + this.compareAttributeName);
        File nodeLogFile = this.stringToFile(noteLog.toString(), "nodeLog");
        BasicData nodeLogData = new BasicData((Object)nodeLogFile, TEXT_TYPE);
        Dictionary nodeAttr = nodeLogData.getMetadata();
        nodeAttr.put("Parent", this.inputData);
        nodeAttr.put("Type", "Text");
        nodeAttr.put("Label", "Text Log: Noteworthy nodes that will NOT be merged");
        File mergeLogFile = this.stringToFile(mergeLog.toString(), "mergeLog");
        BasicData mergeLogData = new BasicData((Object)mergeLogFile, TEXT_TYPE);
        Dictionary mergeAttr = mergeLogData.getMetadata();
        mergeAttr.put("Parent", this.inputData);
        mergeAttr.put("Type", "Text");
        mergeAttr.put("Label", "Text Log: Nodes that will be merged");
        return new Data[]{nodeTableData, mergeLogData, nodeLogData};
    }

    private float compareNodesCaseInsensitiveBy(String attributeColumn, Integer nodeOneIndex, Integer nodeTwoIndex, Table nodeTable) {
        String nodeOneAttribute = nodeTable.getString(nodeOneIndex.intValue(), attributeColumn);
        String nodeTwoAttribute = nodeTable.getString(nodeTwoIndex.intValue(), attributeColumn);
        float similarity = this.similarityChecker.getSimilarity(nodeOneAttribute.toLowerCase(), nodeTwoAttribute.toLowerCase());
        return similarity;
    }

    private Graph makeEmptyMergeGraph(Table nodeTable) {
        Table edgeTable = new Table();
        edgeTable.addColumn(Graph.DEFAULT_SOURCE_KEY, Integer.TYPE);
        edgeTable.addColumn(Graph.DEFAULT_TARGET_KEY, Integer.TYPE);
        edgeTable.addColumn(SIMILARITY_COLUMN_NAME, Float.TYPE);
        boolean isDirected = false;
        Graph mergeGraph = new Graph(nodeTable, edgeTable, isDirected);
        return mergeGraph;
    }

    private Table createTableSchema(Schema graphSchema, Table t) {
        int i = 0;
        while (i < graphSchema.getColumnCount()) {
            t.addColumn(graphSchema.getColumnName(i), graphSchema.getColumnType(i));
            ++i;
        }
        t.addColumn(UNIQUE_INDEX_COLUMN_NAME, Integer.TYPE);
        t.addColumn(COMBINE_VALUES_COLUMN_NAME, String.class, (Object)"*");
        return t;
    }

    private Table populateTable(Table t, Graph g) {
        Iterator it = g.nodes();
        while (it.hasNext()) {
            Node n = (Node)it.next();
            t.addRow();
            int i = 0;
            while (i < n.getColumnCount()) {
                t.set(t.getRowCount() - 1, i, n.get(i));
                ++i;
            }
            t.set(t.getRowCount() - 1, UNIQUE_INDEX_COLUMN_NAME, (Object)new Integer(t.getRowCount()));
        }
        return t;
    }

    public List extractWeakComponentClusters(Graph graph) {
        ArrayList<LinkedHashSet> clusters = new ArrayList<LinkedHashSet>();
        HashSet seenNodes = new HashSet();
        Iterator it = graph.nodes();
        while (it.hasNext()) {
            Node n = (Node)it.next();
            Integer i = new Integer(n.getRow());
            if (seenNodes.contains(i)) continue;
            LinkedHashSet cluster = GraphSearchAlgorithms.undirectedDepthFirstSearch(graph, i);
            seenNodes.addAll(cluster);
            clusters.add(cluster);
        }
        return clusters;
    }

    private ListMap sortNodesByAttributePrefix(Table nodeTable, String compareAttributeName, int numPrefixLetters) {
        ListMap nodesByAttributePrefix = new ListMap();
        IntIterator nodeIndexIt = nodeTable.rows();
        while (nodeIndexIt.hasNext()) {
            int nodeIndex = nodeIndexIt.nextInt();
            Tuple row = nodeTable.getTuple(nodeIndex);
            String comparisonAttributeContents = row.getString(compareAttributeName);
            if (comparisonAttributeContents == null) continue;
            String prefixKey = this.extractPrefixKey(comparisonAttributeContents, numPrefixLetters);
            nodesByAttributePrefix.put(prefixKey, new Integer(nodeIndex));
        }
        return nodesByAttributePrefix;
    }

    private String extractPrefixKey(String s, int prefixLength) {
        if (prefixLength <= s.length() && prefixLength >= 1) {
            return s.substring(0, prefixLength);
        }
        if (prefixLength > s.length()) {
            return s;
        }
        return NOT_THE_PRIMARY_NODE;
    }

    private File stringToFile(String s, String fileName) throws AlgorithmExecutionException {
        try {
            File outFile = File.createTempFile(fileName, "txt");
            FileWriter out = new FileWriter(outFile);
            out.write(s);
            out.close();
            return outFile;
        }
        catch (IOException e) {
            throw new AlgorithmExecutionException("Could not create file from string", (Throwable)e);
        }
    }

    private static class SimilarityReport
    implements Comparable {
        protected String leftName;
        protected String rightName;
        protected double similarity;

        public SimilarityReport(String nameLeft, String nameRight, double similarity) {
            this.leftName = nameLeft;
            this.rightName = nameRight;
            this.similarity = similarity;
        }

        public int compareTo(Object other) {
            if (other instanceof SimilarityReport) {
                SimilarityReport that = (SimilarityReport)other;
                if (this.similarity < that.similarity) {
                    return -1;
                }
                if (this.similarity > that.similarity) {
                    return 1;
                }
                return 0;
            }
            throw new ClassCastException("A SimilarityReport can only be compared to other SimilarityReports.");
        }
    }
}

