Source code for tethne.tests.test_analyze_features
import sys
sys.path.append('./')
import unittest
from tethne.classes.feature import Feature, FeatureSet
from tethne.readers.wos import read
from tethne.analyze.features import *
datapath = './tethne/tests/data/wos3.txt'
[docs]class TestCosineSimilarity(unittest.TestCase):
[docs] def test_cosine_similarity(self):
feature = Feature([('bob', 3), ('joe', 1), ('bobert', 1)])
feature2 = Feature([('blob', 3), ('joe', 1), ('brobert', 1)])
feature3 = Feature([('blob', 1), ('joe', 2), ('brobert', 1)])
c = cosine_similarity(feature3, feature2)
self.assertIsInstance(c, float)
self.assertGreater(c, 0.)
[docs] def test_cosine_similarity_citations(self):
corpus = read(datapath, index_by='wosid')
top = corpus.top_features('citations', 1)[0][0]
P = corpus.features['citations'].papers_containing(top)
F_a = corpus.features['citations'].features[P[0]]
F_b = corpus.features['citations'].features[P[1]]
c = cosine_similarity(F_a, F_b)
self.assertIsInstance(c, float)
self.assertGreater(c, 0.)
[docs]class TestAngularSimilarity(unittest.TestCase):
[docs] def test_angular_similarity(self):
feature = Feature([('bob', 3), ('joe', 1), ('bobert', 1)])
feature2 = Feature([('blob', 3), ('joe', 1), ('brobert', 1)])
feature3 = Feature([('blob', 1), ('joe', 2), ('brobert', 1)])
c = angular_similarity(feature3, feature2)
self.assertIsInstance(c, float)
self.assertGreater(c, 0.)
[docs] def test_angular_similarity_citations(self):
corpus = read(datapath, index_by='wosid')
top = corpus.top_features('citations', 1)[0][0]
P = corpus.features['citations'].papers_containing(top)
F_a = corpus.features['citations'].features[P[0]]
F_b = corpus.features['citations'].features[P[1]]
c = angular_similarity(F_a, F_b)
self.assertIsInstance(c, float)
self.assertGreater(c, 0.)
[docs]class TestKLDivergence(unittest.TestCase):
[docs] def test_kl_divergence(self):
feature = Feature([('bob', 3), ('joe', 1), ('bobert', 1)])
feature2 = Feature([('blob', 3), ('joe', 1), ('brobert', 1)])
feature3 = Feature([('blob', 1), ('joe', 2), ('brobert', 1)])
featureset = FeatureSet()
featureset.add('p1', feature)
featureset.add('p2', feature2)
featureset.add('p3', feature3)
V_a = featureset.as_vector('p2')
V_b = featureset.as_vector('p3')
k = kl_divergence(V_a, V_b)
self.assertIsInstance(k, float)
self.assertGreater(k, 0.)
[docs] def test_kl_divergence_citations(self):
corpus = read(datapath, index_by='wosid')
top = corpus.top_features('citations', 1)[0][0]
P = corpus.features['citations'].papers_containing(top)
V_a = corpus.features['citations'].as_vector(P[0])
V_b = corpus.features['citations'].as_vector(P[1])
k = kl_divergence(V_a, V_b)
self.assertIsInstance(k, float)
self.assertGreater(k, 0.)
if __name__ == '__main__':
unittest.main()