SciPy

Source code for tethne.networks.helpers

"""
Helper functions for generating networks.

.. autosummary::
   :nosignatures:

   citation_count
   simplify_multigraph
   top_cited
   top_parents

"""

import networkx as nx
import operator
import numpy as np
#import tethne.utilities as util  # Pylint Warnings
#import tethne.data as ds

from collections import Counter

import sys
PYTHON_3 = sys.version_info[0] == 3
if PYTHON_3:
    unicode = str


[docs]def simplify_multigraph(multigraph, time=False): """ Simplifies a graph by condensing multiple edges between the same node pair into a single edge, with a weight attribute equal to the number of edges. Parameters ---------- graph : networkx.MultiGraph E.g. a coauthorship graph. time : bool If True, will generate 'start' and 'end' attributes for each edge, corresponding to the earliest and latest 'date' values for that edge. Returns ------- graph : networkx.Graph A NetworkX :class:`.graph` . """ graph = nx.Graph() for node in multigraph.nodes(data=True): u = node[0] node_attribs = node[1] graph.add_node(u, node_attribs) for v in multigraph[u]: edges = multigraph.get_edge_data(u, v) # Dict. edge_attribs = { 'weight': len(edges) } if time: # Look for a date in each edge. start = 3000 end = 0 found_date = False for edge in edges.values(): try: found_date = True if edge['date'] < start: start = edge['date'] if edge['date'] > end: end = edge['date'] except KeyError: # No date to be found. pass if found_date: # If no date found, don't add start/end atts. edge_attribs['start'] = start edge_attribs['end'] = end graph.add_edge(u, v, edge_attribs) return graph
[docs]def citation_count(papers, key='ayjid', verbose=False): """ Generates citation counts for all of the papers cited by papers. Parameters ---------- papers : list A list of :class:`.Paper` instances. key : str Property to use as node key. Default is 'ayjid' (recommended). verbose : bool If True, prints status messages. Returns ------- counts : dict Citation counts for all papers cited by papers. """ if verbose: print "Generating citation counts for "+unicode(len(papers))+" papers..." counts = Counter() for P in papers: if P['citations'] is not None: for p in P['citations']: counts[p[key]] += 1 return counts
[docs]def top_cited(papers, topn=20, verbose=False): """ Generates a list of the topn (or topn%) most cited papers. Parameters ---------- papers : list A list of :class:`.Paper` instances. topn : int or float {0.-1.} Number (int) or percentage (float) of top-cited papers to return. verbose : bool If True, prints status messages. Returns ------- top : list A list of 'ayjid' keys for the topn most cited papers. counts : dict Citation counts for all papers cited by papers. """ if verbose: print "Finding top "+unicode(topn)+" most cited papers..." counts = citation_count(papers, verbose=verbose) if type(topn) is int: n = topn elif type(topn) is float: n = int(np.around(topn*len(counts), decimals=-1)) top = dict(sorted(counts.items(), key=operator.itemgetter(1), reverse=True)[:n]).keys() return top, counts
[docs]def top_parents(papers, topn=20, verbose=False): """ Returns a list of :class:`.Paper` that cite the topn most cited papers. Parameters ---------- papers : list A list of :class:`.Paper` objects. topn : int or float {0.-1.} Number (int) or percentage (float) of top-cited papers. verbose : bool If True, prints status messages. Returns ------- papers : list A list of :class:`.Paper` objects. top : list A list of 'ayjid' keys for the topn most cited papers. counts : dict Citation counts for all papers cited by papers. """ if verbose: print "Getting parents of top "+unicode(topn)+" most cited papers." top, counts = top_cited(papers, topn=topn, verbose=verbose) papers = [ P for P in papers if P['citations'] is not None ] parents = [ P for P in papers if len( set([ c['ayjid'] for c in P['citations'] ]) & set(top) ) > 0 ] if verbose: print "Found " + unicode(len(parents)) + " parents." return parents, top, counts