Initial commit

2014-11-17 01:03:10 +01:00 · 2014-11-17 01:03:10 +01:00 · e91356c0c1
parent bb3591fa17
commit e91356c0c1
2 changed files with 258 additions and 1 deletions
--- a/README.md
+++ b/README.md
@ -1,6 +1,86 @@
 these-3-words
 =============

-Address any 3x3m square on earth with a unique three word name.
+Address any 3x3meter square on earth with a unique three word name.

 Inspired by http://what3words.com/
+
+
+example
+=======
+
+    >>> import thesethreewords as these
+
+    # the home of particle physics
+    >>> CERN = (46.232355, 6.055419)
+
+    >>> three = these.three_words(CERN)
+    >>> print three
+    'engirt-aleutic-canun'
+    >>> these.decode(three)
+    (46.232335567474365, 6.055419445037842)
+
+Check out where this is on [google maps][cernmap].
+
+
+requirements
+============
+
+You need to install the [geohash][geohash] library:
+
+    $ pip install geohash
+
+
+six words
+=========
+
+There are a lot of 3x3m squares on the earth's surface. To encode
+them in only three words requires a long wordlist, as a result
+some fairly obscure words get on it. If you can live with
+having to remember six words the wordlist is much shorter.
+The six word wordlist comes from the amazing [humanhash][humanhash]
+library. Words were chosen to maximise clarity in human
+communication.
+
+    >>> six = these.six_words(CERN)
+    >>> print six
+    'spaghetti-carolina-kentucky-oscar-iowa-table'
+    >>> these.decode(six)
+    (46.232335567474365, 6.055419445037842)
+
+
+how it works
+============
+
+Each latitude/longitude pair is converted to a nine
+character geohash. This provides about 3meter
+resolution at all latitudes. The geohash is then
+converted to an integer which is encoded as a string
+of words.
+
+The wordlist used to encode the `geohash` into just
+three words uses your local computers dictionary. Some
+attempts are made to remove really obscure words but
+it could be better. You need to use the same wordlist
+when encoding and decoding a `these-3-words` hash.
+
+The `these-3-words` hash shares the
+property of a `geohash` that nearby locations share
+have similar `these-3-words` hashes
+
+    >>> other_CERN_site = (46.256811, 6.056792)
+    >>> six = these.six_words(other_CERN_site)
+    >>> print six
+    ''spaghetti-carolina-kentucky-utah-seventeen-neptune'
+    >>> these.decode(six)
+    (46.256797313690186, 6.056792736053467)
+
+The other CERN site is [here][othercernmap].
+
+this is a [@betatim][betatim] kind of idea
+
+[humanhash]: https://github.com/zacharyvoase/humanhash
+[geohash]: https://code.google.com/p/python-geohash/
+[cernmap]: https://www.google.ch/maps/place/46%C2%B013'56.4%22N+6%C2%B003'19.5%22E/@46.2323356,6.0554194,17z/data=!3m1!4b1!4m2!3m1!1s0x0:0x0
+[othercernmap]: https://www.google.ch/maps/place/46%C2%B015'24.5%22N+6%C2%B003'24.4%22E/@46.256811,6.056792,14z/data=!4m2!3m1!1s0x0:0x0
+[betatim]: https://twitter.com/betatim
--- a/thesethreewords.py
+++ b/thesethreewords.py
@ -0,0 +1,177 @@
+import random
+
+import geohash
+
+
+def get_random_words():
+    words = open("/usr/share/dict/words")
+    random.seed(3346346)
+    useful = []
+    for w in words:
+        w = w.strip()
+        if 5 <= len(w) < 8:
+            useful.append(w.lower())
+            
+    words.close()
+    useful = useful[:2**15]
+    random.shuffle(useful)
+    assert len(useful) == 2**15
+    return useful
+RANDOM_WORDLIST = get_random_words()
+
+# Human friendly word list, taken directly from humanhash project
+HUMAN_WORDLIST = (
+        'ack', 'alabama', 'alanine', 'alaska', 'alpha', 'angel', 'apart', 'april',
+        'arizona', 'arkansas', 'artist', 'asparagus', 'aspen', 'august', 'autumn',
+        'avocado', 'bacon', 'bakerloo', 'batman', 'beer', 'berlin', 'beryllium',
+        'black', 'blossom', 'blue', 'bluebird', 'bravo', 'bulldog', 'burger',
+        'butter', 'california', 'carbon', 'cardinal', 'carolina', 'carpet', 'cat',
+        'ceiling', 'charlie', 'chicken', 'coffee', 'cola', 'cold', 'colorado',
+        'comet', 'connecticut', 'crazy', 'cup', 'dakota', 'december', 'delaware',
+        'delta', 'diet', 'don', 'double', 'early', 'earth', 'east', 'echo',
+        'edward', 'eight', 'eighteen', 'eleven', 'emma', 'enemy', 'equal',
+        'failed', 'fanta', 'fifteen', 'fillet', 'finch', 'fish', 'five', 'fix',
+        'floor', 'florida', 'football', 'four', 'fourteen', 'foxtrot', 'freddie',
+        'friend', 'fruit', 'gee', 'georgia', 'glucose', 'golf', 'green', 'grey',
+        'hamper', 'happy', 'harry', 'hawaii', 'helium', 'high', 'hot', 'hotel',
+        'hydrogen', 'idaho', 'illinois', 'india', 'indigo', 'ink', 'iowa',
+        'island', 'item', 'jersey', 'jig', 'johnny', 'juliet', 'july', 'jupiter',
+        'kansas', 'kentucky', 'kilo', 'king', 'kitten', 'lactose', 'lake', 'lamp',
+        'lemon', 'leopard', 'lima', 'lion', 'lithium', 'london', 'louisiana',
+        'low', 'magazine', 'magnesium', 'maine', 'mango', 'march', 'mars',
+        'maryland', 'massachusetts', 'may', 'mexico', 'michigan', 'mike',
+        'minnesota', 'mirror', 'mississippi', 'missouri', 'mobile', 'mockingbird',
+        'monkey', 'montana', 'moon', 'mountain', 'muppet', 'music', 'nebraska',
+        'neptune', 'network', 'nevada', 'nine', 'nineteen', 'nitrogen', 'north',
+        'november', 'nuts', 'october', 'ohio', 'oklahoma', 'one', 'orange',
+        'oranges', 'oregon', 'oscar', 'oven', 'oxygen', 'papa', 'paris', 'pasta',
+        'pennsylvania', 'pip', 'pizza', 'pluto', 'potato', 'princess', 'purple',
+        'quebec', 'queen', 'quiet', 'red', 'river', 'robert', 'robin', 'romeo',
+        'rugby', 'sad', 'salami', 'saturn', 'september', 'seven', 'seventeen',
+        'shade', 'sierra', 'single', 'sink', 'six', 'sixteen', 'skylark', 'snake',
+        'social', 'sodium', 'solar', 'south', 'spaghetti', 'speaker', 'spring',
+        'stairway', 'steak', 'stream', 'summer', 'sweet', 'table', 'tango', 'ten',
+        'tennessee', 'tennis', 'texas', 'thirteen', 'three', 'timing', 'triple',
+        'twelve', 'twenty', 'two', 'uncle', 'undress', 'uniform', 'uranus', 'utah',
+        'vegan', 'venus', 'vermont', 'victor', 'video', 'violet', 'virginia',
+        'washington', 'west', 'whiskey', 'white', 'william', 'winner', 'winter',
+        'wisconsin', 'wolfram', 'wyoming', 'xray', 'yankee', 'yellow', 'zebra',
+        'zulu')
+
+
+class WordHasher(object):
+    def __init__(self):
+        """Convert latitude and longitudes into human readable strings."""
+        self._symbols = "0123456789bcdefghjkmnpqrstuvwxyz"
+        self._decode_symbols = dict((ch, i) for (i, ch) in enumerate(self._symbols))
+        self._encode_symbols = dict((i, ch) for (i, ch) in enumerate(self._symbols))
+
+        
+    def three_words(self, (lat, lon)):
+        """Convert coordinate to a combination of three words
+
+        The coordinate is defined by latitude and longitude
+        in degrees.
+        """
+        gh = geohash.encode(lat, lon, 9)
+        words = "-".join(RANDOM_WORDLIST[p] for p in self.to_rugbits(self.geo_to_int(gh)))
+        return words
+
+    def six_words(self, (lat, lon)):
+        """Convert coordinate to a combination of six words
+
+        The coordinate is defined by latitude and longitude
+        in degrees.
+
+        With six words the word list contains only words
+        which are short, easy to pronounce and easy distinguish.
+        """
+        gh = geohash.encode(lat, lon, 9)
+        words = "-".join(HUMAN_WORDLIST[p] for p in self.to_bytes(self.pad(gh)))
+        return words
+
+    def decode(self, words):
+        """Decode words back to latitude and longitude"""
+        words = words.split("-")
+        if len(words) == 3:
+            i = self.rugbits_to_int([RANDOM_WORDLIST.index(w) for w in words])
+
+        elif len(words) == 6:
+            i = self.bytes_to_int([HUMAN_WORDLIST.index(w) for w in words])
+            i = self.unpad(i)
+
+        else:
+            raise RuntimeError("Do not know how to decode set of %i words."%(len(words)))
+
+        geo_hash = self.int_to_geo(i)
+        return geohash.decode(geo_hash)
+
+    def geo_to_int(self, geo_hash):
+        """Decode `geo_hash` to an integer"""
+        base = len(self._symbols)
+        number = 0
+        for symbol in geo_hash:
+            number = number*base + self._decode_symbols[symbol]
+        
+        return number
+
+    def int_to_geo(self, integer):
+        """Encode `integer` to a geo hash"""
+        base = len(self._symbols)
+        symbols = []
+        while integer > 0:
+            remainder = integer % base
+            integer //= base
+            symbols.append(self._encode_symbols[remainder])
+            
+        return ''.join(reversed(symbols))
+
+    def pad(self, geo_hash):
+        """Pad nine character `geo_hash` to 48bit integer"""
+        assert len(geo_hash) == 9
+        return self.geo_to_int(geo_hash) * 8
+
+    def unpad(self, integer):
+        """Remove 3bit of padding to get 45bit geo hash"""
+        return integer>>3
+    
+    def to_bytes(self, integer):
+        """Convert a 48bit `integer` to a list of 6bytes"""
+        bytes = [integer & 0b11111111]
+        for n in xrange(1,6):
+            div = 2**(n*8)
+            bytes.append((integer/div) & 0b11111111)
+        
+        bytes.reverse()
+        return bytes
+
+    def bytes_to_int(self, bytes):
+        """Convert a list of 6`bytes` to an integer"""
+        assert len(bytes) == 6
+        byte_string = []
+        for b in bytes:
+            bs = bin(b)[2:]
+            bs = "0"*(8-len(bs)) + bs
+            byte_string.append(bs)
+        return int(''.join(byte_string), 2)
+
+    def to_rugbits(self, integer):
+        """Convert a 45bit `integer` to a list of 3rugbits
+    
+        A rugbit is like a byte but with 15bits instead of eight.
+        """
+        fifteen_bits = 0b111111111111111
+        rugbits = [(integer/(2**30)) & fifteen_bits,
+                   (integer/(2**15)) & fifteen_bits,
+                   integer & fifteen_bits]
+        return rugbits
+
+    def rugbits_to_int(self, rugbits):
+        """Convert a list of `rugbits` to an integer"""
+        return (rugbits[0] *(2**30)) + (rugbits[1] *(2**15)) + (rugbits[2])
+
+
+DEFAULT_HASHER = WordHasher()
+three_words = DEFAULT_HASHER.three_words
+six_words = DEFAULT_HASHER.six_words
+decode = DEFAULT_HASHER.decode