/*
 * Decompiled with CFR 0.152.
 */
package com.wcohen.ss;

import com.wcohen.ss.AbstractTokenizedStringDistance;
import com.wcohen.ss.BagOfTokens;
import com.wcohen.ss.PrintfFormat;
import com.wcohen.ss.api.StringWrapper;
import com.wcohen.ss.api.StringWrapperIterator;
import com.wcohen.ss.api.Token;
import com.wcohen.ss.api.Tokenizer;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;

public abstract class JensenShannonDistance
extends AbstractTokenizedStringDistance {
    private Map backgroundFrequency = new HashMap();
    int totalTokens = 0;
    private static final Integer ONE = new Integer(1);
    private static final Integer TWO = new Integer(2);
    private static final Integer THREE = new Integer(3);

    public JensenShannonDistance(Tokenizer tokenizer) {
        super(tokenizer);
    }

    public JensenShannonDistance() {
    }

    public final void train(StringWrapperIterator i) {
        HashSet seenTokens = new HashSet();
        while (i.hasNext()) {
            StringWrapper s = (StringWrapper)i.next();
            BagOfTokens bag = this.asBagOfTokens(i.nextStringWrapper());
            Iterator j = bag.tokenIterator();
            while (j.hasNext()) {
                Token tokj = (Token)j.next();
                ++this.totalTokens;
                Integer freq = (Integer)this.backgroundFrequency.get(tokj);
                if (freq == null) {
                    this.backgroundFrequency.put(tokj, ONE);
                    continue;
                }
                if (freq == ONE) {
                    this.backgroundFrequency.put(tokj, TWO);
                    continue;
                }
                if (freq == TWO) {
                    this.backgroundFrequency.put(tokj, THREE);
                    continue;
                }
                this.backgroundFrequency.put(tokj, new Integer(freq + 1));
            }
        }
    }

    public final StringWrapper prepare(String s) {
        BagOfTokens bag = new BagOfTokens(s, this.tokenizer.tokenize(s));
        double totalWeight = bag.getTotalWeight();
        Iterator i = bag.tokenIterator();
        while (i.hasNext()) {
            Token tok = (Token)i.next();
            double freq = bag.getWeight(tok);
            bag.setWeight(tok, this.smoothedProbability(tok, freq, totalWeight));
        }
        return bag;
    }

    protected abstract double smoothedProbability(Token var1, double var2, double var4);

    protected double backgroundProb(Token tok) {
        Integer freqInteger = (Integer)this.backgroundFrequency.get(tok);
        double freq = freqInteger == null ? 0.0 : (double)freqInteger.intValue();
        return freq / (double)this.totalTokens;
    }

    public final double score(StringWrapper s, StringWrapper t) {
        BagOfTokens sBag = (BagOfTokens)s;
        BagOfTokens tBag = (BagOfTokens)t;
        double sum = 0.0;
        Iterator i = sBag.tokenIterator();
        while (i.hasNext()) {
            Token tok = (Token)i.next();
            if (!tBag.contains(tok)) continue;
            double ps = sBag.getWeight(tok);
            double pt = tBag.getWeight(tok);
            sum -= this.h(ps + pt) - this.h(ps) - this.h(pt);
        }
        return 0.5 * sum / Math.log(2.0);
    }

    private double h(double p) {
        return -p * Math.log(p);
    }

    public final String explainScore(StringWrapper s, StringWrapper t) {
        StringBuffer buf = new StringBuffer();
        PrintfFormat fmt = new PrintfFormat("%.3f");
        BagOfTokens sBag = (BagOfTokens)s;
        BagOfTokens tBag = (BagOfTokens)t;
        buf.append("Common tokens: ");
        Iterator i = sBag.tokenIterator();
        while (i.hasNext()) {
            Token tok = (Token)i.next();
            if (!tBag.contains(tok)) continue;
            double ps = sBag.getWeight(tok);
            double pt = tBag.getWeight(tok);
            buf.append(" " + tok.getValue() + ": ");
            buf.append(fmt.sprintf(ps));
            buf.append("*");
            buf.append(fmt.sprintf(pt));
            buf.append(":delta=");
            buf.append(fmt.sprintf(this.h(ps + pt) - this.h(ps) - this.h(pt)));
        }
        buf.append("\nscore = " + this.score(s, t));
        return buf.toString();
    }
}

