SciPy

Source code for tethne.tests.test_readers_merge

import sys
sys.path.append('../tethne')

import unittest
from tethne.readers import dfr, wos, merge
from tethne import Corpus, Paper, FeatureSet

dfr_datapath = './tethne/tests/data/dfr'
wos_datapath = './tethne/tests/data/wos2.txt'


[docs]class TestMerge(unittest.TestCase):
[docs] def setUp(self): self.dfr_corpus = dfr.read(dfr_datapath) self.wos_corpus = wos.read(wos_datapath)
[docs] def test_merge(self): combined = merge(self.dfr_corpus, self.wos_corpus) self.assertEqual(len(combined), 472) old_features = list(set(self.dfr_corpus.features.keys()) | \ set(self.wos_corpus.features.keys())) self.assertListEqual(combined.features.keys(), old_features)
[docs] def test_merge_comparator(self): """ Instead of passing a list of fields to compare, we can pass a callable object that returns bool. """ comparator = lambda p1, p2: p1.ayjid == p2.ayjid combined = merge(self.dfr_corpus, self.wos_corpus, match_by=comparator) self.assertEqual(len(combined), 472)
[docs] def test_merge_both_empty(self): """ Testing the functionality of merge when both lists passed are empty """ wos_papers = [] wos_corpus = Corpus(wos_papers) dfr_papers = [] dfr_corpus = Corpus(dfr_papers) expected_len = 0 self.assertEqual(expected_len,len(merge(wos_corpus,dfr_corpus)))
[docs] def test_merge_one_empty(self): """ Testing the functionality of merge when one of the lists passed is empty """ wos_papers = [] wos_corpus = Corpus(wos_papers) dfr_papers = [] dfr_paper = Paper() dfr_paper['date'] = 1965 dfr_papers.append(dfr_paper) dfr_corpus = Corpus(dfr_papers) dfr_papers.append(dfr_paper) expected_len = 1 self.assertEqual(expected_len,len(merge(dfr_corpus,wos_corpus))) self.assertEqual(expected_len,len(merge(wos_corpus,dfr_corpus)))
[docs] def test_merge_not_equal(self): """ Testing the functionality of merge by passing two lists with 1 field each and field's values in both are not equal """ wos_papers = [] wos_paper = Paper() wos_paper['date'] = 1999 wos_papers.append(wos_paper) wos_corpus = Corpus(wos_papers) dfr_papers = [] dfr_paper = Paper() dfr_paper['date'] = 1965 dfr_papers.append(dfr_paper) dfr_corpus = Corpus(dfr_papers) result = merge(dfr_corpus,wos_corpus,['date']) expected_len = 2 self.assertEqual(expected_len,len(result))
[docs] def test_merge_equal(self): """ Testing the functionality of merge by passing two lists with 1 field each and field's values in both are equal """ wos_papers = [] wos_paper = Paper() wos_paper['date'] = 1999 wos_papers.append(wos_paper) wos_corpus = Corpus(wos_papers) dfr_papers = [] dfr_paper = Paper() dfr_paper['date'] = 1999 dfr_papers.append(dfr_paper) dfr_corpus = Corpus(dfr_papers) result = merge(dfr_corpus,wos_corpus,['date']) expected_len = 1 self.assertEqual(1999,result[0].__getitem__('date'))
if __name__ == '__main__': unittest.main()