/*
 * Decompiled with CFR 0.152.
 */
package edu.iu.nwb.shared.isiutil;

import com.google.common.collect.HashMultimap;
import com.google.common.collect.SetMultimap;
import edu.iu.nwb.shared.isiutil.ISITag;
import edu.iu.nwb.shared.isiutil.exception.CitationExtractionPreparationException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import org.cishell.utilities.StringUtilities;
import org.osgi.service.log.LogService;
import prefuse.data.DataTypeException;
import prefuse.data.Table;
import prefuse.data.Tuple;
import prefuse.util.collections.IntIterator;

public class ISICitationExtractionPreparer {
    public static final String SELF_REFERENCE_COLUMN_NAME = "Cite Me As";
    private static final String ISI_AUTHOR_SEPARATOR = "|";
    private static final String ISI_CITATION_SEPARATOR = "|";
    private static final String ISI_FIELD_SEPARATOR = ", ";
    private LogService log;
    private static final float SAME_WORD_SCORE = 1.0f;
    private static final float DIFFERENT_WORD_SCORE = -5.0f;
    private static final float MISSING_LETTER_ABBREVIATION_SCORE = -3.0f;
    private static final float MAYBE_USED_DIFFERENT_VOWEL_SCORE = 0.5f;
    private static final float EXTRA_LETTER_PENALTY = -0.3f;
    private static final String VOWEL = "[aeiouyAEIOUY]";
    private static final String ALL_NUMBERS = "^[0-9]+$";

    public ISICitationExtractionPreparer(LogService log) {
        this.log = log;
    }

    public Table prepareForCitationExtraction(Table isiTable, boolean shouldCleanReferences) throws CitationExtractionPreparationException {
        if (shouldCleanReferences) {
            isiTable = this.cleanReferences(isiTable);
        }
        isiTable = this.addSelfReferences(isiTable);
        return isiTable;
    }

    private Table addSelfReferences(Table isiTable) throws CitationExtractionPreparationException {
        isiTable.addColumn(SELF_REFERENCE_COLUMN_NAME, String.class);
        IntIterator tableIt = isiTable.rows();
        while (tableIt.hasNext()) {
            int rowIndex = tableIt.nextInt();
            Tuple row = isiTable.getTuple(rowIndex);
            String selfReference = this.createSelfReference(row);
            isiTable.setString(rowIndex, SELF_REFERENCE_COLUMN_NAME, selfReference);
        }
        return isiTable;
    }

    private String createSelfReference(Tuple isiRow) throws CitationExtractionPreparationException {
        ArrayList<String> selfReferenceTokenList = new ArrayList<String>();
        try {
            String author = ISICitationExtractionPreparer.extractFirstAndLastNameOfFirstAuthor(isiRow);
            if (author != null) {
                selfReferenceTokenList.add(author);
            } else {
                this.handleNoAuthor(isiRow);
            }
            String year = ISICitationExtractionPreparer.extractPublicationYear(isiRow);
            if (year != null) {
                selfReferenceTokenList.add(year);
            } else {
                this.handleNoYear(isiRow);
            }
            String journal = ISICitationExtractionPreparer.extractAbbreviatedJournalName(isiRow);
            if (journal != null) {
                selfReferenceTokenList.add(journal);
            } else {
                this.handleNoJournal(isiRow);
            }
            String volume = ISICitationExtractionPreparer.extractVolume(isiRow);
            if (volume != null) {
                selfReferenceTokenList.add(volume);
            } else {
                this.handleNoVolume();
            }
            String page = ISICitationExtractionPreparer.extractFirstPage(isiRow);
            if (page != null) {
                selfReferenceTokenList.add(page);
            } else {
                this.handleNoPage();
            }
            String doi = ISICitationExtractionPreparer.extractDOI(isiRow);
            if (doi != null) {
                selfReferenceTokenList.add(doi);
            } else {
                this.handleNoDOI();
            }
        }
        catch (ArrayIndexOutOfBoundsException arrayIndexOutOfBoundsException) {
        }
        catch (DataTypeException e) {
            throw new CitationExtractionPreparationException("Some elements in the tuple '" + isiRow + "' cannot be converted to a String (apparently)", e);
        }
        String[] selfReferenceTokens = selfReferenceTokenList.toArray(new String[selfReferenceTokenList.size()]);
        String selfReference = StringUtilities.implodeStringArray((String[])selfReferenceTokens, (String)ISI_FIELD_SEPARATOR);
        return selfReference;
    }

    private static Table replaceJournalNamesWithCitedJournalNames(Table isiTable, Map<String, String> journalNameToCitedJournalName) {
        IntIterator tableIt = isiTable.rows();
        while (tableIt.hasNext()) {
            String citedJournalName;
            int rowIndex = tableIt.nextInt();
            Tuple row = isiTable.getTuple(rowIndex);
            String journalName = row.getString(ISITag.TWENTY_NINE_CHAR_JOURNAL_ABBREVIATION.columnName);
            if (journalName == null || (citedJournalName = journalNameToCitedJournalName.get(journalName)) == null) continue;
            row.setString(ISITag.TWENTY_NINE_CHAR_JOURNAL_ABBREVIATION.columnName, citedJournalName);
        }
        return isiTable;
    }

    private SetMultimap<String, String> extractCitedJournalNames(Table isiTable) {
        HashMultimap citedJournalNames = HashMultimap.create();
        IntIterator tableIt = isiTable.rows();
        while (tableIt.hasNext()) {
            int rowIndex = tableIt.nextInt();
            Tuple row = isiTable.getTuple(rowIndex);
            String citedReferences = row.getString(ISITag.CITED_REFERENCES.columnName);
            if (citedReferences == null) continue;
            String[] eachCitedReference = citedReferences.split("\\|");
            int i = 0;
            while (i < eachCitedReference.length) {
                String firstLetter;
                String citedReference = eachCitedReference[i];
                String citedJournalName = this.extractCitedJournalName(citedReference);
                if (citedJournalName != null && (firstLetter = this.extractFirstLetter(citedJournalName)) != null) {
                    citedJournalNames.put((Object)firstLetter, (Object)citedJournalName);
                }
                ++i;
            }
        }
        return citedJournalNames;
    }

    private SetMultimap<String, String> extractJournalNames(Table isiTable) {
        HashMultimap journalNames = HashMultimap.create();
        IntIterator tableIt = isiTable.rows();
        while (tableIt.hasNext()) {
            String firstLetter;
            int rowIndex = tableIt.nextInt();
            Tuple row = isiTable.getTuple(rowIndex);
            String journalName = row.getString(ISITag.TWENTY_NINE_CHAR_JOURNAL_ABBREVIATION.columnName);
            if (journalName == null || (firstLetter = this.extractFirstLetter(journalName)) == null) continue;
            journalNames.put((Object)firstLetter, (Object)journalName);
        }
        return journalNames;
    }

    private static Map<String, String> linkJournalNamesToCitedJournalNames(SetMultimap<String, String> journalNamesByFirstLetter, SetMultimap<String, String> citedJournalNamesByFirstLetter) {
        HashMap<String, String> journalNameToCitedJournalName = new HashMap<String, String>();
        Set journalNameKeys = journalNamesByFirstLetter.keySet();
        for (String firstLetterKey : journalNameKeys) {
            Set journalNamesStartingWithLetter = journalNamesByFirstLetter.get((Object)firstLetterKey);
            Set citedJournalNamesStartingWithLetter = citedJournalNamesByFirstLetter.get((Object)firstLetterKey);
            ISICitationExtractionPreparer.linkJournalNamesOfSameFirstLetter(journalNameToCitedJournalName, journalNamesStartingWithLetter, citedJournalNamesStartingWithLetter);
        }
        return journalNameToCitedJournalName;
    }

    private static Map<String, String> linkJournalNamesOfSameFirstLetter(Map<String, String> links, Set<String> journalNames, Set<String> citedJournalNames) {
        if (journalNames == null || citedJournalNames == null) {
            return links;
        }
        for (String journalName : journalNames) {
            float NO_CHANGE_THRESHOLD;
            String bestCitedJournalName = null;
            float bestCitedJournalSimilarity = NO_CHANGE_THRESHOLD = 0.1f;
            for (String citedJournalName : citedJournalNames) {
                float similarity = ISICitationExtractionPreparer.calculateNameSimilarity(journalName, citedJournalName);
                if (!(similarity > bestCitedJournalSimilarity)) continue;
                bestCitedJournalName = citedJournalName;
                bestCitedJournalSimilarity = similarity;
            }
            if (bestCitedJournalName == null) continue;
            links.put(journalName, bestCitedJournalName);
        }
        return links;
    }

    private Table cleanReferences(Table isiTable) {
        SetMultimap<String, String> citedJournalNamesByFirstLetter = this.extractCitedJournalNames(isiTable);
        SetMultimap<String, String> journalNamesByFirstLetter = this.extractJournalNames(isiTable);
        Map<String, String> journalNameToCitedJournalName = ISICitationExtractionPreparer.linkJournalNamesToCitedJournalNames(citedJournalNamesByFirstLetter, journalNamesByFirstLetter);
        isiTable = ISICitationExtractionPreparer.replaceJournalNamesWithCitedJournalNames(isiTable, journalNameToCitedJournalName);
        return isiTable;
    }

    private static float calculateNameSimilarity(String jn, String cjn) {
        StringBuffer bufferLog = new StringBuffer();
        bufferLog.append("Calculating '" + jn + "' and '" + cjn + "'.\r\n");
        String whitespace = "\\s";
        String[] jnWords = ISICitationExtractionPreparer.trimAfterEmpties(jn.split(whitespace));
        String[] cjnWords = ISICitationExtractionPreparer.trimAfterEmpties(cjn.split(whitespace));
        String[] oneWithMoreWords = ISICitationExtractionPreparer.getLongest(jnWords, cjnWords);
        float scoreCounter = 0.0f;
        int i = 0;
        while (i < jnWords.length && i < cjnWords.length) {
            scoreCounter += ISICitationExtractionPreparer.calculateWordSimilarity(jnWords[i], cjnWords[i], bufferLog);
            ++i;
        }
        int j = i;
        while (j < jnWords.length || j < cjnWords.length) {
            scoreCounter += ISICitationExtractionPreparer.calculateWordSimilarity(oneWithMoreWords[j], null, bufferLog);
            ++j;
        }
        float finalScore = scoreCounter / (float)oneWithMoreWords.length;
        if (finalScore > -0.5f && (double)finalScore < 0.5) {
            System.out.println(jn + " == " + cjn + ": " + finalScore);
            System.out.println(bufferLog.toString());
        }
        return finalScore;
    }

    private static float calculateWordSimilarity(String word1, String word2, StringBuffer wordSimilarityCalculationLog) {
        wordSimilarityCalculationLog.append("  comparing '" + word1 + "' with '" + word2 + "'\r\n");
        if (word1 == null && word2 == null) {
            wordSimilarityCalculationLog.append("    both null. returning 0\r\n");
            return 0.0f;
        }
        if (word1 == null) {
            if (word2.matches(ALL_NUMBERS)) {
                return -0.5f;
            }
            wordSimilarityCalculationLog.append("    word1 is null. returning " + (float)word2.length() * -0.3f + "\r\n");
            return (float)word2.length() * -0.3f + -5.0f;
        }
        if (word2 == null) {
            if (word1.matches(ALL_NUMBERS)) {
                return -0.5f;
            }
            wordSimilarityCalculationLog.append("    word2 is null. returning " + (float)word1.length() * -0.3f + "\r\n");
            return (float)word1.length() * -0.3f + -5.0f;
        }
        int minLength = Math.min(word1.length(), word2.length());
        int maxLength = Math.max(word1.length(), word2.length());
        String shortWord = ISICitationExtractionPreparer.getShortWord(word1, word2);
        String longWord = ISICitationExtractionPreparer.getLongWord(word1, word2);
        float scoreModifier = 0.0f;
        boolean sameWord = true;
        int vowelsSkipped = 0;
        boolean missingLetterAbbreviation = false;
        boolean maybeUsedWrongVowel = false;
        int i = 0;
        while (i < minLength) {
            if (word1.charAt(i) == word2.charAt(i)) {
                scoreModifier += 1.0f;
            } else {
                if (!String.valueOf(word1.charAt(i)).matches(VOWEL) || !String.valueOf(word2.charAt(i)).matches(VOWEL) || minLength != maxLength || i == 1) {
                    maybeUsedWrongVowel = false;
                    wordSimilarityCalculationLog.append("      non-matching letters " + word1.charAt(i) + "," + word2.charAt(i));
                    if (i == 0) {
                        sameWord = false;
                    }
                    int longWordPlace = i;
                    int j = i;
                    while (j < minLength) {
                        int index = longWord.substring(longWordPlace).indexOf(shortWord.charAt(j));
                        wordSimilarityCalculationLog.append("      does '" + longWord.substring(longWordPlace) + "' contain " + shortWord.charAt(j) + "'?");
                        if (index != -1) {
                            if (index > 0) {
                                missingLetterAbbreviation = true;
                                int k = longWordPlace;
                                while (k < longWordPlace + index) {
                                    if (!String.valueOf(longWord.charAt(k)).matches(VOWEL)) {
                                        if (++vowelsSkipped > 1) {
                                            sameWord = false;
                                        }
                                        wordSimilarityCalculationLog.append("Skipped a vowel!");
                                    }
                                    ++k;
                                }
                            }
                            wordSimilarityCalculationLog.append("      Yes");
                            scoreModifier += 1.0f / (float)(index + 1);
                            longWordPlace += index + 1;
                        } else {
                            wordSimilarityCalculationLog.append("      No");
                            sameWord = false;
                        }
                        ++j;
                    }
                    break;
                }
                wordSimilarityCalculationLog.append("Maybe used wrong vowel");
                maybeUsedWrongVowel = true;
            }
            ++i;
        }
        float finalScoreModifier = scoreModifier / (float)maxLength;
        float finalScore = 0.0f;
        if (sameWord && !missingLetterAbbreviation) {
            finalScore = !maybeUsedWrongVowel ? 1.0f * finalScoreModifier : 0.5f * finalScoreModifier;
        } else if (missingLetterAbbreviation && sameWord) {
            wordSimilarityCalculationLog.append("Missing letter abbreviation score!: (final score modifier) " + finalScoreModifier + "\r\n");
            finalScore = -3.0f * (0.6f - finalScoreModifier);
        } else {
            finalScore = -5.0f * (1.0f - finalScoreModifier) - 1.5f;
        }
        wordSimilarityCalculationLog.append("   returning: " + finalScore + "\r\n");
        return finalScore;
    }

    private static String[] getLongest(String[] sa1, String[] sa2) {
        if (sa1.length >= sa2.length) {
            return sa1;
        }
        return sa2;
    }

    private static String getLongWord(String w1, String w2) {
        if (w1.length() >= w2.length()) {
            return w1;
        }
        return w2;
    }

    private static String getShortWord(String w1, String w2) {
        if (w1.length() >= w2.length()) {
            return w2;
        }
        return w1;
    }

    private String extractCitedJournalName(String citedReference) {
        if (citedReference == null) {
            this.printNullCitedReferenceWarning();
            return null;
        }
        String[] sections = citedReference.split(ISI_FIELD_SEPARATOR);
        if (sections.length < 3) {
            this.printNullCitedReferenceWarning();
            return null;
        }
        String citedJournalName = sections[2];
        return citedJournalName;
    }

    private String extractFirstLetter(String citedJournalName) {
        if (citedJournalName.trim().length() == 0) {
            this.printZeroLengthCitedJournalNameWarning();
            return null;
        }
        String firstLetter = citedJournalName.trim().substring(0, 1);
        return firstLetter;
    }

    private static String extractFirstAndLastNameOfFirstAuthor(Tuple isiRow) {
        String authors = isiRow.getString(ISITag.AUTHORS.columnName);
        if (authors == null) {
            return null;
        }
        String[] eachAuthor = authors.split("\\|");
        if (eachAuthor.length == 0) {
            return ISICitationExtractionPreparer.handleNoAuthors();
        }
        String firstAuthor = eachAuthor[0];
        String oneOrMoreCommasOrWhitespaces = "[,\\s]+";
        String[] nameTokens = firstAuthor.trim().split(oneOrMoreCommasOrWhitespaces);
        String firstAndLastNameOfFirstAuthor = StringUtilities.implodeStringArray((String[])nameTokens, (String)" ");
        return firstAndLastNameOfFirstAuthor;
    }

    private static String extractPublicationYear(Tuple isiRow) {
        String publicationYear = isiRow.getString(ISITag.PUBLICATION_YEAR.columnName);
        return publicationYear;
    }

    private static String extractAbbreviatedJournalName(Tuple isiRow) {
        String abbreviatedJournalName = isiRow.getString(ISITag.TWENTY_NINE_CHAR_JOURNAL_ABBREVIATION.columnName);
        return abbreviatedJournalName;
    }

    private static String extractVolume(Tuple isiRow) {
        String volume = isiRow.getString(ISITag.VOLUME.columnName);
        if (volume == null) {
            return null;
        }
        return "V" + volume;
    }

    private static String extractFirstPage(Tuple isiRow) {
        String firstPage = isiRow.getString(ISITag.BEGINNING_PAGE.columnName);
        if (firstPage == null) {
            return null;
        }
        String startsWithALetter = "^[a-zA-Z].*$";
        if (firstPage.matches(startsWithALetter)) {
            return firstPage;
        }
        return "P" + firstPage;
    }

    private static String extractDOI(Tuple isiRow) {
        String doi = isiRow.getString(ISITag.DOI.columnName);
        if (doi == null) {
            return null;
        }
        return "DOI " + doi;
    }

    private static String handleNoAuthors() {
        return null;
    }

    private static String[] trimAfterEmpties(String[] s) {
        ArrayList<String> sList = new ArrayList<String>();
        int i = 0;
        while (i < s.length) {
            if (s[i].equals("")) break;
            sList.add(s[i]);
            ++i;
        }
        return sList.toArray(new String[sList.size()]);
    }

    private void handleNoAuthor(Tuple isiRow) {
        this.log.log(2, "The row " + isiRow + " has no author column. The '" + SELF_REFERENCE_COLUMN_NAME + "' field  will be invalid");
    }

    private void handleNoYear(Tuple isiRow) {
        this.log.log(2, "The row " + isiRow + " has no year column. The '" + SELF_REFERENCE_COLUMN_NAME + "' field may be invalid");
    }

    private void handleNoJournal(Tuple isiRow) {
        this.log.log(2, "The row " + isiRow + " has no jounal column (J9). The '" + SELF_REFERENCE_COLUMN_NAME + "' field may be invalid");
    }

    private void handleNoVolume() {
    }

    private void handleNoPage() {
    }

    private void handleNoDOI() {
    }

    private void printNullCitedReferenceWarning() {
    }

    private void printZeroLengthCitedJournalNameWarning() {
    }
}

