/*
 * Decompiled with CFR 0.152.
 */
package edu.iu.nwb.analysis.isidupremover;

import edu.iu.nwb.analysis.isidupremover.TablePair;
import edu.iu.nwb.analysis.isidupremover.tuplecomparison.ISIPubComparer;
import edu.iu.nwb.analysis.isidupremover.tuplecomparison.MainPubComparer;
import edu.iu.nwb.shared.isiutil.ISITag;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Dictionary;
import org.cishell.framework.CIShellContext;
import org.cishell.framework.algorithm.AlgorithmExecutionException;
import org.cishell.framework.data.Data;
import org.osgi.service.log.LogService;
import prefuse.data.Table;
import prefuse.data.Tuple;
import prefuse.util.collections.IntIterator;

public class ISIDupRemover {
    Data[] data;
    Dictionary parameters;
    CIShellContext context;
    private static final String LOG_FILE_NAME = "isiduplicateremoverlog";
    private LogService log;
    private ISIPubComparer mainPubComparer = new MainPubComparer();

    public TablePair removeDuplicatePublications(Table origTable, LogService log, boolean printRunningLogToConsole) throws AlgorithmExecutionException {
        if (!this.tableSanityCheckPasses(origTable)) {
            this.log.log(2, "Unable to remove duplicates from table. Returning original table.");
            return new TablePair(origTable, origTable);
        }
        this.log = log;
        StringBuffer runningLog = new StringBuffer();
        Integer savedPubIndex = null;
        String savedPubID = null;
        int recordsWithoutUIDs = 0;
        IntIterator publicationsByIDIter = origTable.rowsSortedBy(ISITag.UNIQUE_ID.columnName, true);
        ArrayList<Integer> publicationsToRemove = new ArrayList<Integer>();
        while (publicationsByIDIter.hasNext()) {
            Integer currentPubIndex = (Integer)publicationsByIDIter.next();
            String currentPubID = origTable.getString(currentPubIndex.intValue(), ISITag.UNIQUE_ID.columnName);
            if (currentPubID == null) {
                ++recordsWithoutUIDs;
                continue;
            }
            if (!currentPubID.equals(savedPubID)) {
                savedPubIndex = currentPubIndex;
                savedPubID = currentPubID;
                continue;
            }
            Integer pubToRemoveIndex = this.determineWhichToRemove(origTable, currentPubIndex, savedPubIndex, runningLog);
            if (pubToRemoveIndex.equals(currentPubIndex)) {
                publicationsToRemove.add(currentPubIndex);
                continue;
            }
            publicationsToRemove.add(savedPubIndex);
            savedPubIndex = currentPubIndex;
            savedPubID = currentPubID;
        }
        if (printRunningLogToConsole) {
            log.log(3, runningLog.toString());
        }
        File logFile = null;
        try {
            logFile = File.createTempFile(LOG_FILE_NAME, ".txt");
            BufferedWriter writer = new BufferedWriter(new FileWriter(logFile));
            writer.write(runningLog.toString());
            writer.close();
        }
        catch (IOException e) {
            throw new AlgorithmExecutionException("Unable to write removed duplicates log.", (Throwable)e);
        }
        Table noDupTable = new Table();
        noDupTable.addColumns(origTable.getSchema());
        Table dupTable = new Table();
        dupTable.addColumns(origTable.getSchema());
        for (Integer pubIndex : origTable) {
            if (!publicationsToRemove.contains(pubIndex)) {
                noDupTable.addTuple(origTable.getTuple(pubIndex.intValue()));
                continue;
            }
            dupTable.addTuple(origTable.getTuple(pubIndex.intValue()));
        }
        log.log(3, "The original " + origTable.getRowCount() + " records have been processed to remove duplicate unique ISI IDs leaving " + noDupTable.getRowCount() + " records.");
        if (recordsWithoutUIDs > 0) {
            log.log(2, recordsWithoutUIDs + " records did not have unique IDs (specified with the UT tag in ISI format)," + " so we were unable to determine whether there were duplicates of these records. " + "The absence of a unique ID is most likely a flaw in the original data.");
        }
        if (logFile != null) {
            log.log(3, "");
            log.log(3, "Wrote log to " + logFile.getAbsolutePath());
        }
        return new TablePair(noDupTable, dupTable);
    }

    private Integer determineWhichToRemove(Table table, Integer currentPubIndex, Integer savedPubIndex, StringBuffer runningLog) {
        Integer pubToRemoveIndex;
        Tuple currentPubTuple = table.getTuple(currentPubIndex.intValue());
        Tuple savedPubTuple = table.getTuple(savedPubIndex.intValue());
        String commonID = currentPubTuple.getString(ISITag.UNIQUE_ID.columnName);
        String currentPubTitle = currentPubTuple.getString(ISITag.TITLE.columnName);
        String savedPubTitle = savedPubTuple.getString(ISITag.TITLE.columnName);
        runningLog.append("Found a pair of publication records with ID '" + commonID + "'\r\n");
        if (currentPubTitle == null && savedPubTitle == null) {
            runningLog.append("Neither have a title specified (Very unusual).");
        } else {
            if (currentPubTitle == null) {
                runningLog.append("The first does not have a title.");
                runningLog.append("Removing first.");
                return currentPubIndex;
            }
            if (savedPubTitle == null) {
                runningLog.append("The second does not have a title.");
                runningLog.append("Removing second.");
                return savedPubIndex;
            }
        }
        if (currentPubTitle.equals(savedPubTitle)) {
            String commonTitle = currentPubTitle;
            runningLog.append("Both titled '" + commonTitle + "'\r\n");
        } else {
            runningLog.append("The first titled '" + currentPubTitle + "'\r\n");
            runningLog.append("The second titled '" + savedPubTitle + "'\r\n");
        }
        int compareResult = this.mainPubComparer.compare(currentPubTuple, savedPubTuple, runningLog);
        if (compareResult > 0) {
            runningLog.append("Removing second\r\n");
            pubToRemoveIndex = savedPubIndex;
        } else if (compareResult < 0) {
            runningLog.append("Removing first\r\n");
            pubToRemoveIndex = currentPubIndex;
        } else {
            runningLog.append("Arbitrarily removing first\r\n");
            pubToRemoveIndex = currentPubIndex;
        }
        runningLog.append("\r\n");
        runningLog.append("--------------------\r\n");
        runningLog.append("\r\n");
        return pubToRemoveIndex;
    }

    private boolean tableSanityCheckPasses(Table isiTable) {
        boolean hasAUniqueIDColumn = isiTable.canGetString(ISITag.UNIQUE_ID.columnName);
        if (!hasAUniqueIDColumn) {
            this.log.log(2, "ISI Table does not have a unique ID column (abbreviated UT).It is possible that no records (a.k.a papers) in the original ISI file specified a unique ID.Therefore, we are unable to determine which papers are duplicates.");
            return false;
        }
        return true;
    }
}

