SciPy
Need help? Have a feature request? Please check out the tethne-users group .

Source code for tethne.tests.test_models_lda

import sys
sys.path.append('../tethne')

import unittest
import tempfile
import os
from xml.etree import ElementTree as ET
import networkx as nx
import csv


from tethne.readers.wos import read
from tethne import FeatureSet, tokenize
from tethne.networks import topics

datapath = './tethne/tests/data/wos3.txt'

import logging
logger = logging.getLogger('mallet')
logger.setLevel('DEBUG')


[docs]class TestHelpers(unittest.TestCase):
[docs] def setUp(self): from tethne.model.corpus.mallet import LDAModel self.corpus = read(datapath, index_by='wosid') self.corpus.index_feature('abstract', tokenize, structured=True) self.old_model = LDAModel(self.corpus, featureset_name='abstract', nodelete=True) self.old_model.fit(Z=20, max_iter=50)
[docs] def test_mallet_to_theta_featureset(self): from tethne import mallet_to_theta_featureset theta = mallet_to_theta_featureset(self.old_model.dt) self.assertIsInstance(theta, FeatureSet) self.assertEqual(len(theta), len(self.corpus.features['abstract'].features))
[docs] def test_mallet_to_phi_featureset(self): from tethne import mallet_to_phi_featureset phi, vocab = mallet_to_phi_featureset(self.old_model.wt) self.assertIsInstance(phi, FeatureSet) self.assertEqual(len(phi), 20)
[docs]class TestLDAModelExistingOutput(unittest.TestCase):
[docs] def setUp(self): from tethne.model.corpus.mallet import LDAModel self.corpus = read(datapath, index_by='wosid') self.corpus.index_feature('abstract', tokenize, structured=True) self.old_model = LDAModel(self.corpus, featureset_name='abstract', nodelete=True) self.old_model.fit(Z=20, max_iter=50)
[docs] def test_load_existing_data(self): from tethne.model.corpus.mallet import LDAModel new_model = LDAModel(self.corpus, featureset_name='abstract', nodelete=True, prep=False, wt=self.old_model.wt, dt=self.old_model.dt, om=self.old_model.om) new_model.load() self.assertEqual(self.old_model.topics_in(u'WOS:000295037200001'), new_model.topics_in(u'WOS:000295037200001'))
[docs] def test_load_existing_data_staticmethod(self): from tethne.model.corpus.mallet import LDAModel new_model = LDAModel.from_mallet(self.corpus, 'abstract', self.old_model.wt, self.old_model.dt, self.old_model.om) self.assertEqual(self.old_model.topics_in(u'WOS:000295037200001'), new_model.topics_in(u'WOS:000295037200001'))
[docs]class TestLDAModel(unittest.TestCase):
[docs] def setUp(self): from tethne.model.corpus.mallet import LDAModel corpus = read(datapath, index_by='wosid') corpus.index_feature('abstract', tokenize, structured=True) self.model = LDAModel(corpus, featureset_name='abstract') self.model.fit(Z=20, max_iter=500)
[docs] def test_ldamodel(self): dates, rep = self.model.topic_over_time(1) self.assertGreater(sum(rep), 0) self.assertEqual(len(dates), len(rep)) self.assertIsInstance(self.model.phi, FeatureSet) self.assertIsInstance(self.model.theta, FeatureSet) self.assertIsInstance(self.model.list_topics(), list) self.assertGreater(len(self.model.list_topics()), 0) self.assertIsInstance(self.model.list_topic(0), list) self.assertGreater(len(self.model.list_topic(0)), 0)
[docs] def test_networks(self): termGraph = topics.terms(self.model) self.assertGreater(termGraph.size(), 100) self.assertGreater(termGraph.order(), 10) topicGraph = topics.cotopics(self.model) self.assertGreater(topicGraph.size(), 5) self.assertGreater(topicGraph.order(), 0) paperGraph = topics.topic_coupling(self.model) self.assertGreater(paperGraph.size(), 100) self.assertGreater(paperGraph.order(), 20)
[docs]class TestLDAModelUnstructured(unittest.TestCase):
[docs] def setUp(self): from tethne.model.corpus.mallet import LDAModel corpus = read(datapath, index_by='wosid') corpus.index_feature('abstract', tokenize) self.model = LDAModel(corpus, featureset_name='abstract') self.model.fit(Z=20, max_iter=500)
[docs] def test_ldamodel(self): dates, rep = self.model.topic_over_time(1) self.assertGreater(sum(rep), 0) self.assertEqual(len(dates), len(rep)) self.assertIsInstance(self.model.phi, FeatureSet) self.assertIsInstance(self.model.theta, FeatureSet) self.assertIsInstance(self.model.list_topics(), list) self.assertGreater(len(self.model.list_topics()), 0) self.assertIsInstance(self.model.list_topic(0), list) self.assertGreater(len(self.model.list_topic(0)), 0)
[docs] def test_networks(self): termGraph = topics.terms(self.model) self.assertGreater(termGraph.size(), 100) self.assertGreater(termGraph.order(), 10) topicGraph = topics.cotopics(self.model) self.assertGreater(topicGraph.size(), 5) self.assertGreater(topicGraph.order(), 0) paperGraph = topics.topic_coupling(self.model) self.assertGreater(paperGraph.size(), 100) self.assertGreater(paperGraph.order(), 20)
[docs]class TestLDAModelWithTransformation(unittest.TestCase):
[docs] def setUp(self): from tethne.model.corpus.mallet import LDAModel corpus = read(datapath, index_by='wosid') corpus.index_feature('abstract', tokenize) xf = lambda f, c, C, DC: c*3 corpus.features['xf'] = corpus.features['abstract'].transform(xf) self.model = LDAModel(corpus, featureset_name='xf') self.model.fit(Z=20, max_iter=500)
[docs] def test_ldamodel(self): dates, rep = self.model.topic_over_time(1) self.assertGreater(sum(rep), 0) self.assertEqual(len(dates), len(rep)) self.assertIsInstance(self.model.phi, FeatureSet) self.assertIsInstance(self.model.theta, FeatureSet) self.assertIsInstance(self.model.list_topics(), list) self.assertGreater(len(self.model.list_topics()), 0) self.assertIsInstance(self.model.list_topic(0), list) self.assertGreater(len(self.model.list_topic(0)), 0)
[docs] def test_networks(self): termGraph = topics.terms(self.model) self.assertGreater(termGraph.size(), 100) self.assertGreater(termGraph.order(), 10) topicGraph = topics.cotopics(self.model) self.assertGreater(topicGraph.size(), 5) self.assertGreater(topicGraph.order(), 0) paperGraph = topics.topic_coupling(self.model) self.assertGreater(paperGraph.size(), 100) self.assertGreater(paperGraph.order(), 20)
[docs]class TestLDAModelMALLETPath(unittest.TestCase):
[docs] def test_direct_import(self): from tethne import LDAModel corpus = read(datapath, index_by='wosid') corpus.index_feature('abstract', tokenize, structured=True) self.model = LDAModel(corpus, featureset_name='abstract') self.model.fit(Z=20, max_iter=500)
if __name__ == '__main__': unittest.main()