Source code for tethne.tests.test_models_lda
import sys
sys.path.append('../tethne')
import unittest
import tempfile
import os
from xml.etree import ElementTree as ET
import networkx as nx
import csv
from tethne.model.corpus.mallet import LDAModel
from tethne.readers.wos import read
from tethne import FeatureSet, tokenize
from tethne.networks import topics
datapath = './tethne/tests/data/wos3.txt'
import logging
logger = logging.getLogger('mallet')
logger.setLevel('DEBUG')
[docs]class TestLDAModel(unittest.TestCase):
[docs] def setUp(self):
corpus = read(datapath, index_by='wosid')
corpus.index_feature('abstract', tokenize, structured=True)
self.model = LDAModel(corpus, featureset_name='abstract')
self.model.fit(Z=20, max_iter=500)
[docs] def test_ldamodel(self):
dates, rep = self.model.topic_over_time(1)
self.assertGreater(sum(rep), 0)
self.assertEqual(len(dates), len(rep))
self.assertIsInstance(self.model.phi, FeatureSet)
self.assertIsInstance(self.model.theta, FeatureSet)
self.assertIsInstance(self.model.list_topics(), list)
self.assertGreater(len(self.model.list_topics()), 0)
self.assertIsInstance(self.model.list_topic(0), list)
self.assertGreater(len(self.model.list_topic(0)), 0)
[docs] def test_networks(self):
termGraph = topics.terms(self.model)
self.assertGreater(termGraph.size(), 100)
self.assertGreater(termGraph.order(), 10)
topicGraph = topics.cotopics(self.model)
self.assertGreater(topicGraph.size(), 5)
self.assertGreater(topicGraph.order(), 0)
paperGraph = topics.topic_coupling(self.model)
self.assertGreater(paperGraph.size(), 100)
self.assertGreater(paperGraph.order(), 20)
[docs]class TestLDAModelUnstructured(unittest.TestCase):
[docs] def setUp(self):
corpus = read(datapath, index_by='wosid')
corpus.index_feature('abstract', tokenize)
self.model = LDAModel(corpus, featureset_name='abstract')
self.model.fit(Z=20, max_iter=500)
[docs] def test_ldamodel(self):
dates, rep = self.model.topic_over_time(1)
self.assertGreater(sum(rep), 0)
self.assertEqual(len(dates), len(rep))
self.assertIsInstance(self.model.phi, FeatureSet)
self.assertIsInstance(self.model.theta, FeatureSet)
self.assertIsInstance(self.model.list_topics(), list)
self.assertGreater(len(self.model.list_topics()), 0)
self.assertIsInstance(self.model.list_topic(0), list)
self.assertGreater(len(self.model.list_topic(0)), 0)
[docs] def test_networks(self):
termGraph = topics.terms(self.model)
self.assertGreater(termGraph.size(), 100)
self.assertGreater(termGraph.order(), 10)
topicGraph = topics.cotopics(self.model)
self.assertGreater(topicGraph.size(), 5)
self.assertGreater(topicGraph.order(), 0)
paperGraph = topics.topic_coupling(self.model)
self.assertGreater(paperGraph.size(), 100)
self.assertGreater(paperGraph.order(), 20)
if __name__ == '__main__':
unittest.main()