SciPy

Source code for tethne.tests.test_analyze_features

import sys
sys.path.append('./')

import unittest
from tethne.classes.feature import Feature, FeatureSet
from tethne.readers.wos import read
from tethne.analyze.features import *

datapath = './tethne/tests/data/wos3.txt'


[docs]class TestCosineSimilarity(unittest.TestCase):
[docs] def test_cosine_similarity(self): feature = Feature([('bob', 3), ('joe', 1), ('bobert', 1)]) feature2 = Feature([('blob', 3), ('joe', 1), ('brobert', 1)]) feature3 = Feature([('blob', 1), ('joe', 2), ('brobert', 1)]) c = cosine_similarity(feature3, feature2) self.assertIsInstance(c, float) self.assertGreater(c, 0.)
[docs] def test_cosine_similarity_citations(self): corpus = read(datapath, index_by='wosid') top = corpus.top_features('citations', 1)[0][0] P = corpus.features['citations'].papers_containing(top) F_a = corpus.features['citations'].features[P[0]] F_b = corpus.features['citations'].features[P[1]] c = cosine_similarity(F_a, F_b) self.assertIsInstance(c, float) self.assertGreater(c, 0.)
[docs]class TestAngularSimilarity(unittest.TestCase):
[docs] def test_angular_similarity(self): feature = Feature([('bob', 3), ('joe', 1), ('bobert', 1)]) feature2 = Feature([('blob', 3), ('joe', 1), ('brobert', 1)]) feature3 = Feature([('blob', 1), ('joe', 2), ('brobert', 1)]) c = angular_similarity(feature3, feature2) self.assertIsInstance(c, float) self.assertGreater(c, 0.)
[docs] def test_angular_similarity_citations(self): corpus = read(datapath, index_by='wosid') top = corpus.top_features('citations', 1)[0][0] P = corpus.features['citations'].papers_containing(top) F_a = corpus.features['citations'].features[P[0]] F_b = corpus.features['citations'].features[P[1]] c = angular_similarity(F_a, F_b) self.assertIsInstance(c, float) self.assertGreater(c, 0.)
[docs]class TestKLDivergence(unittest.TestCase):
[docs] def test_kl_divergence(self): feature = Feature([('bob', 3), ('joe', 1), ('bobert', 1)]) feature2 = Feature([('blob', 3), ('joe', 1), ('brobert', 1)]) feature3 = Feature([('blob', 1), ('joe', 2), ('brobert', 1)]) featureset = FeatureSet() featureset.add('p1', feature) featureset.add('p2', feature2) featureset.add('p3', feature3) V_a = featureset.as_vector('p2') V_b = featureset.as_vector('p3') k = kl_divergence(V_a, V_b) self.assertIsInstance(k, float) self.assertGreater(k, 0.)
[docs] def test_kl_divergence_citations(self): corpus = read(datapath, index_by='wosid') top = corpus.top_features('citations', 1)[0][0] P = corpus.features['citations'].papers_containing(top) V_a = corpus.features['citations'].as_vector(P[0]) V_b = corpus.features['citations'].as_vector(P[1]) k = kl_divergence(V_a, V_b) self.assertIsInstance(k, float) self.assertGreater(k, 0.)
if __name__ == '__main__': unittest.main()