javatools.datatypes
Class FrequencyVector<T,V extends java.lang.Number & java.lang.Comparable<V>>

java.lang.Object
  extended by javatools.datatypes.FrequencyVector<T,V>

public class FrequencyVector<T,V extends java.lang.Number & java.lang.Comparable<V>>
extends java.lang.Object

This class is part of the Java Tools (see http://mpii.de/yago-naga/javatools). It is licensed under the Creative Commons Attribution License (see http://creativecommons.org/licenses/by/3.0) by the YAGO-NAGA team (see http://mpii.de/yago-naga). Fabian M. Suchanek, Milan Vojnovic, Dinan Gunawardena "Social Tags: Meaning and Suggestions" (pdf, bib) ACM Conference on Information and Knowledge Management (CIKM 2008) The class represents a vector of terms with their frequencies. See the paper for explanations. Also provides Wilson interval computation.

Author:
Fabian M. Suchanek

Constructor Summary
FrequencyVector(java.util.Map<T,V> applications)
          Constructs a frequency vector.
 
Method Summary
 double averagePrecision(java.util.Collection<T> groundTruth)
          Computes the average precision (MAP)
 double cosine(FrequencyVector<T,?> other)
          Computes the cosine to another vector
 double cosine(FrequencyVector<T,?> other, java.util.Collection<T> intersection)
          Computes the cosine to another vector, if the intersection is already available
 double doubleValueFor(T term)
          Returns the frequency for a term as double (or 0)
 boolean equals(java.lang.Object obj)
           
 int firstTiePos()
          Returns the first position in sortedTerms that has equal number of applications to its successor
 double fuzzyPrecisionWithRespectTo(FrequencyVector<T,V> trueFrequencies)
          Computes the fuzzy Precision
 double fuzzyRecallWithRespectTo(FrequencyVector<T,V> trueFrequencies)
          Computes the fuzzy recall
 int hashCode()
           
 java.util.Set<T> intersection(FrequencyVector<T,?> other)
          Computes the common supports
static void main(java.lang.String[] args)
          Test
 double max()
          Returns the maximum of elements
 FrequencyVector<T,java.lang.Double> maxNormalized()
          Max-Normalizes this vector
 double maxNormalizedValueFor(T term)
          Returns the frequency for a term, divided by the maximum (or 0)
 double ndcg2WithRespectToGain(FrequencyVector<T,?> trueFrequencies)
          Computes the NDCG with respect to a gain, with weighting 2^x
 double ndcgWithRespectToGain(FrequencyVector<T,?> trueFrequencies)
          Computes the NDCG with respect to a gain
 double norm()
          Returns the L2 norm of elements
 FrequencyVector<T,java.lang.Double> normalized()
          Normalizes this vector
 FrequencyVector<T,java.lang.Double> normalizedMeanWith(FrequencyVector<T,V> other)
          Computes the mean vector of this vector and the other one
 double normalizedValueFor(T term)
          Returns the frequency for a term, divided by the sum (or 0)
 int numTerms()
          Returns the number of terms in the support
 double optimalAveragePrecision(FrequencyVector<T,?> trueFrequencies)
          Computes the average precision, shuffling subsets to get an optimal value
 double precisionAtKWithRespectTo(java.util.Collection<T> groundTruth, int k)
          Computes the standard precision at k
 double precisionAtKWithRespectTo(FrequencyVector<T,?> groundTruth, int k)
          Computes the standard precision at k
 double precisionWithRespectTo(java.util.Collection<T> groundTruth)
          Computes standard precision
 double precisionWithRespectTo(FrequencyVector<T,?> trueFrequencies)
          Computes the standard precision
 double precisionWithRespectToIntersection(java.util.Collection<T> intersection)
          Computes the standard precision if the intersection is known
 double recallAtKWithRespectTo(java.util.Collection<T> groundTruth, int k)
          Computes the standard recall at k
 double recallAtKWithRespectTo(FrequencyVector<T,V> trueFrequencies, int k)
          Computes the standard recall at k
 double recallWithRespectTo(java.util.Collection<T> groundTruth)
          Computes the standard recall
 double recallWithRespectTo(java.util.Collection<T> trueSet, java.util.Collection<T> intersection)
          Computes standard recall, if the intersection is already available
 double recallWithRespectTo(FrequencyVector<T,?> trueFrequencies)
          Computes standard recall
 double recallWithRespectTo(FrequencyVector<T,?> trueFrequencies, java.util.Collection<T> intersection)
          Computes standard recall, if the intersection is already available
 double smoothedValueFor(T term)
          Returns the frequency for a term, divided by the sum and smoothed
 java.util.List<T> sortedTerms()
          Returns the terms sorted by decreasing frequency
 double sum()
          Returns the sum of elements
 T termAtRank(int i)
          Returns the term at rank i
 java.util.Collection<T> terms()
          Returns the terms.
 java.util.Set<T> topKIntersection(FrequencyVector<T,V> trueFrequencies, int k)
          Computes the intersection of the top k elements
 java.lang.String toString()
           
 V valueFor(T term)
          Returns the frequency for a term (or null)
 double weightedPrecisionAtKWithRespectTo(java.util.Collection<T> groundTruth, int k)
          Computes the precision at k to a set, weighted with this vector's frequencies
 double weightedPrecisionAtKWithRespectTo(FrequencyVector<T,?> groundTruth, int k)
          Computes the weighted precision at k
 double weightedPrecisionWithRespectTo(java.util.Collection<T> groundTruth)
          Computes the standard precision to a set, weighted with this vector's frequencies
 double weightedPrecisionWithRespectTo(FrequencyVector<T,?> trueFrequencies)
          Computes the standard precision to a set, weighted with this vector's frequencies
 double weightedRecallAtKWithRespectTo(FrequencyVector<T,V> trueFrequencies, int k)
          Computes the standard recall at k, weighted with the true frequencies
 double weightedRecallWithRespectTo(FrequencyVector<T,V> trueFrequencies)
          Computes the standard recall, weighted with the true frequencies
static double[] wilson(int total, int correct)
          Computes the Wilson Interval (see http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval) Given the total number of events and the number of "correct" events, returns in a double-array in the first component the center of the Wilson interval and in the second component the width of the interval.
 
Methods inherited from class java.lang.Object
getClass, notify, notifyAll, wait, wait, wait
 

Constructor Detail

FrequencyVector

public FrequencyVector(java.util.Map<T,V> applications)
Constructs a frequency vector. The frequency vector is backed by the map. ZERO-entries are removed!

Method Detail

firstTiePos

public int firstTiePos()
Returns the first position in sortedTerms that has equal number of applications to its successor


sum

public double sum()
Returns the sum of elements


norm

public double norm()
Returns the L2 norm of elements


max

public double max()
Returns the maximum of elements


termAtRank

public T termAtRank(int i)
Returns the term at rank i


numTerms

public int numTerms()
Returns the number of terms in the support


valueFor

public V valueFor(T term)
Returns the frequency for a term (or null)


doubleValueFor

public double doubleValueFor(T term)
Returns the frequency for a term as double (or 0)


normalizedValueFor

public double normalizedValueFor(T term)
Returns the frequency for a term, divided by the sum (or 0)


maxNormalizedValueFor

public double maxNormalizedValueFor(T term)
Returns the frequency for a term, divided by the maximum (or 0)


smoothedValueFor

public double smoothedValueFor(T term)
Returns the frequency for a term, divided by the sum and smoothed


sortedTerms

public java.util.List<T> sortedTerms()
Returns the terms sorted by decreasing frequency


terms

public java.util.Collection<T> terms()
Returns the terms. This collection may be more efficient for membership checks than the sortedTerms


toString

public java.lang.String toString()
Overrides:
toString in class java.lang.Object

hashCode

public int hashCode()
Overrides:
hashCode in class java.lang.Object

equals

public boolean equals(java.lang.Object obj)
Overrides:
equals in class java.lang.Object

normalized

public FrequencyVector<T,java.lang.Double> normalized()
Normalizes this vector


maxNormalized

public FrequencyVector<T,java.lang.Double> maxNormalized()
Max-Normalizes this vector


intersection

public java.util.Set<T> intersection(FrequencyVector<T,?> other)
Computes the common supports


topKIntersection

public java.util.Set<T> topKIntersection(FrequencyVector<T,V> trueFrequencies,
                                         int k)
Computes the intersection of the top k elements


cosine

public double cosine(FrequencyVector<T,?> other,
                     java.util.Collection<T> intersection)
Computes the cosine to another vector, if the intersection is already available


cosine

public double cosine(FrequencyVector<T,?> other)
Computes the cosine to another vector


precisionAtKWithRespectTo

public double precisionAtKWithRespectTo(java.util.Collection<T> groundTruth,
                                        int k)
Computes the standard precision at k


weightedPrecisionAtKWithRespectTo

public double weightedPrecisionAtKWithRespectTo(java.util.Collection<T> groundTruth,
                                                int k)
Computes the precision at k to a set, weighted with this vector's frequencies


precisionWithRespectTo

public double precisionWithRespectTo(java.util.Collection<T> groundTruth)
Computes standard precision


precisionWithRespectTo

public double precisionWithRespectTo(FrequencyVector<T,?> trueFrequencies)
Computes the standard precision


precisionWithRespectToIntersection

public double precisionWithRespectToIntersection(java.util.Collection<T> intersection)
Computes the standard precision if the intersection is known


weightedPrecisionWithRespectTo

public double weightedPrecisionWithRespectTo(java.util.Collection<T> groundTruth)
Computes the standard precision to a set, weighted with this vector's frequencies


weightedPrecisionWithRespectTo

public double weightedPrecisionWithRespectTo(FrequencyVector<T,?> trueFrequencies)
Computes the standard precision to a set, weighted with this vector's frequencies


precisionAtKWithRespectTo

public double precisionAtKWithRespectTo(FrequencyVector<T,?> groundTruth,
                                        int k)
Computes the standard precision at k


weightedPrecisionAtKWithRespectTo

public double weightedPrecisionAtKWithRespectTo(FrequencyVector<T,?> groundTruth,
                                                int k)
Computes the weighted precision at k


averagePrecision

public double averagePrecision(java.util.Collection<T> groundTruth)
Computes the average precision (MAP)


optimalAveragePrecision

public double optimalAveragePrecision(FrequencyVector<T,?> trueFrequencies)
Computes the average precision, shuffling subsets to get an optimal value


recallAtKWithRespectTo

public double recallAtKWithRespectTo(java.util.Collection<T> groundTruth,
                                     int k)
Computes the standard recall at k


recallWithRespectTo

public double recallWithRespectTo(java.util.Collection<T> groundTruth)
Computes the standard recall


weightedRecallWithRespectTo

public double weightedRecallWithRespectTo(FrequencyVector<T,V> trueFrequencies)
Computes the standard recall, weighted with the true frequencies


weightedRecallAtKWithRespectTo

public double weightedRecallAtKWithRespectTo(FrequencyVector<T,V> trueFrequencies,
                                             int k)
Computes the standard recall at k, weighted with the true frequencies


recallWithRespectTo

public double recallWithRespectTo(FrequencyVector<T,?> trueFrequencies)
Computes standard recall


recallWithRespectTo

public double recallWithRespectTo(java.util.Collection<T> trueSet,
                                  java.util.Collection<T> intersection)
Computes standard recall, if the intersection is already available


recallWithRespectTo

public double recallWithRespectTo(FrequencyVector<T,?> trueFrequencies,
                                  java.util.Collection<T> intersection)
Computes standard recall, if the intersection is already available


recallAtKWithRespectTo

public double recallAtKWithRespectTo(FrequencyVector<T,V> trueFrequencies,
                                     int k)
Computes the standard recall at k


ndcgWithRespectToGain

public double ndcgWithRespectToGain(FrequencyVector<T,?> trueFrequencies)
Computes the NDCG with respect to a gain


ndcg2WithRespectToGain

public double ndcg2WithRespectToGain(FrequencyVector<T,?> trueFrequencies)
Computes the NDCG with respect to a gain, with weighting 2^x


normalizedMeanWith

public FrequencyVector<T,java.lang.Double> normalizedMeanWith(FrequencyVector<T,V> other)
Computes the mean vector of this vector and the other one


fuzzyRecallWithRespectTo

public double fuzzyRecallWithRespectTo(FrequencyVector<T,V> trueFrequencies)
Computes the fuzzy recall


fuzzyPrecisionWithRespectTo

public double fuzzyPrecisionWithRespectTo(FrequencyVector<T,V> trueFrequencies)
Computes the fuzzy Precision


wilson

public static double[] wilson(int total,
                              int correct)
Computes the Wilson Interval (see http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval) Given the total number of events and the number of "correct" events, returns in a double-array in the first component the center of the Wilson interval and in the second component the width of the interval. alpha=95%.


main

public static void main(java.lang.String[] args)
Test