SciPy

Source code for tethne.services.geocode

"""
This module provides classes for geocoding bibliographic data.

Each geocoder class should be based on :class:`.BaseCoder`\, and provide
``code`` and ``get_location`` methods that can be used by 
:func:`BaseCoder.code_this` and :func:`BaseCoder.code_list`\.

:class:`.BaseCoder` should **not** be used directly. Instead, instantiate a
child class, e.g. :class:`.GoogleCoder`\. For example:

.. code-block:: python

   >>> from tethne.services.geocode import GoogleCoder
   >>> google = GoogleCoder()
   >>> location = google.code_this("Marine Biological Laboratory")
   >>> location
   <tethne.services.geocode.Location object at 0x10153af10>

   >>> location.__dict__
   {'latitude': 41.5250098, 'place': u'Marine Biological Laboratory, 7 M B L Street, Woods Hole, MA 02543, USA', 'longitude': -70.6712845}
   
To avoid making redundant and costly requests, :class:`.BaseCoder` implements a
rather crude cacheing system, using ``Pickle``. Previous results are held in
memory until the :class:`.BaseCoder` is destroyed, at which time the
placename-:class:`.Location` mapping is pickled in the current working directory
as ``.geocache.pickle``. Disable by setting ``persistent`` to ``False``.

``sleep_interval`` determines the wait (in seconds) between API calls, to avoid 
triggering rate-limiting.

.. autosummary::

   Location
   BaseCoder
   GoogleCoder
   YahooCoder

"""

from geopy import geocoders
from geopy.exc import GeocoderTimedOut
import time 
import pickle
from ssl import SSLError

import logging
logging.basicConfig()
logger = logging.getLogger(__name__)

[docs]class Location(object): """ Minimal geographic datum yielded by geocoders. """ place = "" latitude = 0. longitude = 0. def __init__(self, place="", latitude=0., longitude=0., **kwargs): self.place = place self.latitude = latitude self.longitude = longitude
[docs]class BaseCoder(object): """ Base class for geocoders. """ persistent = True # Triggers on-disk cacheing with Pickle sleep_interval = 0.5 # Avoid rate-limiting. Adjust as desired. timeout = 3 # Duration in seconds until timeout. max_tries = 3 # How many times to re-try after a timeout. def __init__(self, **kwargs): if self.persistent: try: with open(".geocache.pickle", "r") as f: self.cache = pickle.load(f) except IOError: self.cache = {} def __del__(self): with open(".geocache.pickle", "w") as f: pickle.dump(self.cache, f)
[docs] def code_this(self, placename): """ Retrieve a :class:`.Location` for a placename. Parameters ---------- placename : str or unicode Returns ------- location : :class:`.Location` """ if type(placename) not in [str, unicode]: raise ValueError("Encountered non-string in placenames list.") try: # Check the cache first. location = self.cache[placename] except KeyError: # Not in the cache, call the service. tries = 0 hope = True while hope: try: time.sleep(self.sleep_interval) # Avoid rate-limiting. location = self.get_location(self.code(placename)) self.cache[placename] = location hope = False except (GeocoderTimedOut, SSLError): logger.warning("Geocoder timed out for {0}. Retrying." .format(placename)) if tries >= self.max_tries: location = None hope = False logger.warning("Geocoder gave up for {0}." .format(placename)) else: tries += 1 except: pass # TODO: What else could go wrong? return location
[docs] def code_list(self, placenames): """ Retrieve :class:`.Location` for a list of placenames. Parameters ---------- placenames : list Returns ------- locations : dict Placename - :class:`.Location` mapping. """ locations = {} for name in placenames: locations[name] = self.code_this(name) return locations
[docs]class GoogleCoder(BaseCoder): """ Uses the Google Geocoding API, via the ``geopy.geocoders.GoogleV3`` coder. """ coder = geocoders.GoogleV3(timeout=3) code = coder.geocode
[docs] def get_location(self, response): """ Yields :class:`.Location` based on a response from Google Geocoding API. Parameters ---------- response : tuple GoogleV3 geocoder response: (u'Name', (Lat, Lon)) Returns ------- location : :class:`.Location` """ if response is None: return None return Location(place=response[0], latitude=response[1][0], longitude=response[1][1])
[docs]class YahooCoder(BaseCoder): """ Uses the Yahoo PlaceMaker API. """ yahoo_base = "http://where.yahooapis.com/v1/places" lat_searchpath = ".//{http://where.yahooapis.com/v1/schema.rng}centroid/" +\ "{http://where.yahooapis.com/v1/schema.rng}latitude" lon_searchpath = ".//{http://where.yahooapis.com/v1/schema.rng}centroid/" +\ "{http://where.yahooapis.com/v1/schema.rng}longitude" name_searchpath = ".//{http://where.yahooapis.com/v1/schema.rng}name" def __init__(self, yahoo_id, **kwargs): self.yahoo_id = yahoo_id super(YahooCoder, self).__init__(self, **kwargs)
[docs] def code(self, name): """ Constructs and sends a Yahoo PlaceMaker API query. Parameters ---------- name : string Returns ------- HTTPResponse """ import urllib2 rpath = "{0}.q('{1}')?appid={2}".format(self.yahoo_base, urllib2.quote(name), self.yahoo_id) return urllib2.urlopen(rpath).read()
[docs] def get_location(self, response): """ Yields :class:`.Location` based on a response from Yahoo PlaceMaker API. Parameters ---------- response : HTTPResponse Returns ------- location : :class:`.Location` """ import xml.etree.ElementTree as ET rx = ET.fromstring(response) try: lat = float(rx.findall(self.lat_searchpath)[0].text) lon = float(rx.findall(self.lon_searchpath)[0].text) place = str(rx.findall(self.name_searchpath)[0].text) except IndexError: return None # Nothing found. return Location(place=place, latitude=lat, longitude=lon)