"""
A :class:`.GraphCollection` is a set of graphs generated from a
:class:`.Corpus` or model.
"""
import logging
logging.basicConfig()
logger = logging.getLogger(__name__)
logger.setLevel('INFO')
import networkx
import numpy as np
import matplotlib.pyplot as plt
import warnings
from .. import networks as nt
[docs]class GraphCollection(object):
"""
A :class:`.GraphCollection` is an indexed set of ``networkx.Graph``
objects generated from a :class:`.Corpus` or model.
A :class:`.GraphCollection` can be instantiated without any data.
.. code-block:: python
>>> from tethne import GraphCollection
>>> G = GraphCollection()
When you add a :class:`networkx.Graph` to the :class:`.GraphCollection`\,
all of the nodes are indexed and the graph is recast using integer IDs.
This means that node IDs are consistent among all of the graphs in the
collection.
.. code-block:: python
>>> import networkx
>>> g = networkx.Graph()
>>> g.add_edge('Bob', 'Joe')
>>> g.add_edge('Bob', 'Jane')
>>> from tethne import GraphCollection
>>> G = GraphCollection()
>>> G[1950] = g
>>> print G[1950].nodes(data=True)
[(0, {'label': 'Jane'}), (1, {'label': 'Bob'}), (2, {'label': 'Joe'})]
Note that the original node names have been retained in the `label`
attribute.
You can also generate a :class:`.GraphCollection` directly from a
:class:`.Corpus` using the :func:`GraphCollection.build` method.
"""
def __init__(self):
self.graphs = {}
self.edge_list = []
self.node_index = {}
self.node_lookup = {} # Reverse index.
return
def __setitem__(self, index, graph):
"""
Add a :class:`.Graph` to the :class:`.GraphCollection`
Parameters
----------
index
This can be anything used to refer to the graph.
graph : :class:`.networkx.classes.graph.Graph`
Raises
------
ValueError : Graph must be of type networkx.classes.graph.Graph
If value is not a Graph.
"""
self._index_graph(index, graph)
def _index_graph(self, index, graph):
"""
Labels nodes with integer indices used across all graphs.
"""
graph_ = networkx.Graph()
# Index nodes, and add to new graph.
for node in graph.nodes(data=True):
if node[0] in self.node_lookup:
n = self.node_lookup[node[0]]
else:
try:
n = max(self.node_index.keys()) + 1 # Get an unused key.
except ValueError: # node_index is empty.
n = 0
self.node_index[n] = node[0]
self.node_lookup[node[0]] = n
node[1]['label'] = node[0] # Keep label.
graph_.add_node(n, node[1]) # Include node attributes.
for edge in graph.edges(data=True):
n_i = self.node_lookup[edge[0]] # Already indexed all nodes.
n_j = self.node_lookup[edge[1]]
self.edge_list.append((n_i, n_j))
graph_.add_edge(n_i, n_j, edge[2]) # Include edge attributes.
self.graphs[index] = graph_
def __getitem__(self, key):
return self.graphs[key]
def __delitem__(self, key):
del self.graphs[key]
def __len__(self):
return len(self.graphs)
[docs] def build(self, corpus, axis, node_type, graph_type, method_kwargs={},
**kwargs):
"""
Generates a graphs directly from data in a :class:`.Corpus`.
The :mod:`.networks` module contains graph-building methods for
:mod:`.authors`, :mod:`.papers`, :mod:`.features`, and :mod:`.topics`\.
Choose a method from one of these modules by specifying the module name
in ``node_type`` and the method name in ``graph_type``. That method will
be applied to each slice in the :class:`.Corpus`\, ``MyCorpus``, along
the specified ``axis``.
To build a coauthorship network from a :class:`.Corpus`
(already sliced by 'date'):
.. code-block:: python
>>> from tethne import GraphCollection
>>> G = GraphCollection().build(MyCorpus, 'date', 'authors', 'coauthors')
>>> G.graphs
{1921: <networkx.classes.graph.Graph at 0x10b2692d0>,
1926: <networkx.classes.graph.Graph at 0x10b269c50>,
1931: <networkx.classes.graph.Graph at 0x10b269c10>,
1936: <networkx.classes.graph.Graph at 0x10b2695d0>,
1941: <networkx.classes.graph.Graph at 0x10b269dd0>,
1946: <networkx.classes.graph.Graph at 0x10a88bb90>,
1951: <networkx.classes.graph.Graph at 0x10a88b0d0>,
1956: <networkx.classes.graph.Graph at 0x10b269a50>,
1961: <networkx.classes.graph.Graph at 0x10b269b50>,
1966: <networkx.classes.graph.Graph at 0x10b269790>,
1971: <networkx.classes.graph.Graph at 0x10b269d50>,
1976: <networkx.classes.graph.Graph at 0x10a88bed0>}
Parameters
----------
D : :class:`.Corpus`
Must already be sliced by ``axis``.
axis : str
Name of slice axis to use in generating graphs.
node_type : str
Name of a graph-building module in :mod:`.networks`\.
graph_type : str
Name of a method in the module indicated by ``node_type``.
method_kwargs : dict
Kwargs to pass to ``graph_type`` method.
Returns
-------
self : :class:`.GraphCollection`
"""
method = nt.__dict__[node_type].__dict__[graph_type]
for key, data in corpus.get_slices('date', papers=True).iteritems():
# Include sliced fields as node attributes.
method_kwargs['node_attribs'] = corpus.get_axes()
method_kwargs['node_id'] = corpus.index_by
self[key] = method(data, **method_kwargs)
return self
[docs] def nodes(self):
"""
Get the complete set of nodes for this :class:`.GraphCollection`\.
Returns
-------
nodes : list
Complete list of unique node indices for this
:class:`.GraphCollection`\.
Examples
--------
.. code-block:: python
>>> G.nodes()
[0,
1,
2,
3,
4,
.
.
233]
"""
return self.node_index.keys()
[docs] def edges(self, overwrite=False): # [#61512528]
"""
Get the complete set of edges for this :class:`.GraphCollection` .
Parameters
----------
overwrite : bool
If True, will generate new node list, even if one already exists.
Returns
-------
edges : list
List (complete set) of edges for this :class:`.GraphCollection` .
Examples
--------
.. code-block:: python
>>> G.edges()
[(131, 143),
(183, 222),
(54, 55),
(64, 51),
(54, 58),
.
.
(53, 56)]
"""
# TODO: is there a way to simplify this?
if len(self.edge_list) == 0 or overwrite :
edges = set([])
for G in self.graphs.values():
edges = edges | set(G.edges())
self.edge_list = list(edges)
return self.edge_list
def _plot(self, data, ylabel, type='bar', fig=None, plotargs={}, **kwargs):
"""
Parameters
----------
data : tuple
( xvalues, yvalues )
type : str
'plot' or 'bar'
fig : :class:`matplotlib.figure.figure`
If provided, will use this as the basis for the plot. Otherwise,
will generate a new :class:`matplotlib.figure.Figure`\.
plotargs
Passed to PyPlot method.
Returns
-------
fig : :class:`matplotlib.figure.Figure`
"""
xvalues, yvalues = data
if fig is None:
fig = plt.figure(figsize=(10,5))
plt.__dict__[type](xvalues, yvalues, **plotargs)
plt.xlim(np.min(xvalues), np.max(xvalues))
plt.ylabel(ylabel)
return fig
[docs] def node_distribution(self):
"""
Get the number of nodes for each :class:`networkx.Graph` in the
:class:`.GraphCollection`\.
Returns
-------
keys : list
Graph indices.
values : list
Number of nodes in each graph.
Examples
--------
.. code-block:: python
>>> keys, nodes = G.node_distribution()
>>> print keys
[1921, 1926, 1931, 1936, 1941, 1946, 1951, 1956, 1961, 1966, 1971]
>>> print nodes
[0, 2, 16, 8, 2, 5, 14, 16, 33, 60, 44]
"""
keys = sorted(self.graphs.keys())
values = [ len(self[k].nodes()) for k in keys ]
return keys, values
[docs] def plot_node_distribution(self, type='bar', fig=None, plotargs={},
**kwargs):
"""
Plot the values of :func:`.node_distribution` using `MatPlotLib
<http://matplotlib.org>`_.
Parameters
----------
type : str
'plot' or 'bar'
plotargs
Passed to PyPlot method.
Returns
-------
fig : :class:`matplotlib.figure.figure`
Examples
--------
.. code-block:: python
>>> fig = G.plot_node_distribution()
...should generate a plot that looks like:
.. figure:: _static/images/graph_plot_distribution.png
:width: 400
:align: center
"""
data = self.node_distribution()
fig = self._plot(data, 'Nodes', type, fig, plotargs, **kwargs)
return fig
[docs] def edge_distribution(self):
"""
Get the number of edges in each :class:`networkx.Graph` in the
:class:`.GraphCollection`\.
Returns
-------
keys : list
Graph indices.
values : list
Number of nodes in each :class:`.Graph`
Examples
--------
.. code-block:: python
>>> keys, edges = G.edge_distribution()
>>> print keys
[1921, 1926, 1931, 1936, 1941, 1946, 1951, 1956, 1961, 1966, 1971]
>>> print edges
[0, 1, 108, 7, 1, 4, 16, 17, 29, 42, 112]
"""
keys = sorted(self.graphs.keys())
values = [ len(self[k].edges()) for k in keys ]
return keys, values
[docs] def plot_edge_distribution(self, type='bar', fig=None, plotargs={},
**kwargs):
"""
Plot :func:`GraphCollection.edge_distribution` using `MatPlotLib
<http://matplotlib.org>`_.
Parameters
----------
type : str
'plot' or 'bar'
plotargs
Passed to PyPlot method.
Returns
-------
fig : :class:`matplotlib.figure.figure`
Examples
--------
.. code-block:: python
>>> fig = G.plot_edge_distribution()
...should generate a plot that looks like:
.. figure:: _static/images/graph_plot_edge_distribution.png
:width: 400
:align: center
"""
data = self.edge_distribution()
fig = self._plot(data, 'Edges', type, fig, plotargs, **kwargs)
return fig
[docs] def attr_distribution(self, attr='weight', etype='edge', stat=np.mean):
"""
Generate summary statistics for a node or edge attribute across all
of the :class:`networkx.Graph`\s in the :class:`.GraphCollection`\.
Parameters
----------
attr : str
Attribute name.
etype : str
'node' or 'edge'
stat : method
Method to apply to the values in each :class:`.Graph`
Return
-------
keys : list
Graph indices.
values : list
Statistic values for each :class:`.Graph`
Examples
--------
To get the mean edge weight for each graph...
.. code-block:: python
>>> import numpy
>>> keys, means = G.attr_distribution('weight', 'edge', numpy.mean)
>>> print keys
[1921, 1926, 1931, 1936, 1941, 1946, 1951, 1956, 1961, 1966, 1971, 1976]
>>> print means
[0.0, 1.0, 1.1388888888888888, 1.1428571428571428, 4.0, 1.25, 1.0, 1.0, 1.0344827586206897, 1.2142857142857142, 1.0089285714285714, 1.2]
"""
keys = sorted(self.graphs.keys())
values = []
for k in keys:
A = networkx.__dict__['get_{0}_attributes'.format(etype)](self[k], attr).values()
# Ignore warnings; will handle NaNs below.
with warnings.catch_warnings():
warnings.simplefilter('ignore')
v = stat(A)
if np.isnan(v):
v = 0.
values.append(v)
return keys, values
[docs] def plot_attr_distribution(self, attr='weight', etype='edge', stat=np.mean,
type='bar', fig=None, plotargs={}, **kwargs):
"""
Plot :func:`GraphCollection.attr_distribution` using MatPlotLib.
Parameters
----------
attr : str
Attribute name.
etype : str
'node' or 'edge'
stat : method
Method to apply to the values in each :class:`.Graph`
type : str
'plot' or 'bar'
plotargs
Passed to PyPlot method.
Returns
-------
fig : :class:`matplotlib.figure.figure`
Examples
--------
.. code-block:: python
>>> import numpy
>>> G.plot_attr_distribution('weight', 'edge', numpy.mean, fig=fig)
...should generate a plot that looks something like:
.. figure:: _static/images/graph_plot_attr_distribution.png
:width: 400
:align: center
"""
data = self.attr_distribution(attr, etype, stat)
ylabel = ' '.join([stat.__name__, etype, attr])
fig = self._plot(data, ylabel, type, fig, plotargs, **kwargs)
return fig
### TODO: This should probably go away. ###
# def save(self, filepath): #[61512528]
# """
# Pickles (serializes) the :class:`.GraphCollection`\.
#
# .. code-block:: python
#
# >>> G.save('/path/to/archive.pickle')
#
# Parameters
# ----------
# filepath :
# Full path of output file.
#
# Raises
# -------
# PicklingError : Raised when unpicklable objects are Pickled.
# IOError : File does not exist, or cannot be opened.
# """
#
#
# # Try block if the filename is present or not.
# try:
# with open(filepath,'wb') as output:
# try:
# pk.dump(self, output)
# except PicklingError: # Handle the Prickling error.
# raise PicklingError \
# ("Pickling error: The object cannot be pickled")
# except IOError: # File does not exist, or couldn't be read.
# raise IOError("File does not exist, or cannot be opened.")
#
#
# def load(self, filepath): #[61512528]
# """
# Loads a pickled (serialized) :class:`.GraphCollection` from filepath.
#
# .. code-block:: python
#
# >>> G = GraphCollection().load('/path/to/archive.pickle')
#
# Parameters
# ----------
# filepath : string
# Full path to pickled :class:`.GraphCollection` .
#
# Raises
# -------
# UnpicklingError : Raised when there is some issue in unpickling.
# IOError : File does not exist, or cannot be read.
# """
#
# # Handle NameError File not found.
# try:
# with open(filepath,'rb') as input: #reading in binary mode
# try:
# obj_read = pk.load(input)
# except UnpicklingError: # Handle unprickling error.
# raise UnpicklingError \
# ("UnPickling error: The object cannot be found")
#
#
# except IOError: # File does not exist, or couldn't be read.
# raise IOError("File does not exist, or cannot be read.")
#
# # Preserving the object with unpickled data
# if(obj_read):
# self.__dict__ = obj_read.__dict__
#
# return obj_read
[docs] def compose(self):
"""
Returns the simple union of all the ``networkx.Graph``s in the
:class:`.GraphCollection`\.
Returns
-------
composed : :class:`.Graph`
Simple union of all ``networkx.Graph``s in the
:class:`.GraphCollection`\.
Examples
--------
.. code-block:: python
>>> g = G.compose()
>>> g
<networkx.classes.graph.Graph at 0x10bfac710>
Notes
-----
Node or edge attributes that vary over slices should be ignored.
"""
composed = networkx.Graph()
for k, G in self.graphs.iteritems():
composed = networkx.compose(composed, G)
return composed
[docs] def node_history(self, node, attribute):
"""
Returns a dictionary of attribute values for each ``networkx.Graph`` in
the :class:`.GraphCollection` for a single node.
Parameters
----------
node : str
The node of interest.
attribute : str
The attribute of interest; e.g. 'betweenness_centrality'
Returns
-------
history : dict
"""
history = {}
keys = sorted(self.graphs.keys())
for k in keys:
G = self.graphs[k]
asdict = { v[0]:v[1] for v in G.nodes(data=True) }
try:
history[k] = asdict[node][attribute]
except KeyError:
pass # No such node attribute in graph.
return history
[docs] def edge_history(self, source, target, attribute):
"""
Returns a dictionary of attribute vales for each Graph in the
:class:`.GraphCollection` for a single edge.
Parameters
----------
source : str
Identifier for source node.
target : str
Identifier for target node.
attribute : str
The attribute of interest; e.g. 'betweenness_centrality'
Returns
-------
history : dict
"""
history = {}
keys = sorted(self.graphs.keys())
for k in keys:
G = self.graphs[k]
try:
attributes = G[source][target]
try:
history[k] = attributes[attribute]
except KeyError:
pass # No such attribute for edge in Graph.
except KeyError:
pass # No such edge in graph.
return history