Initial commit

This commit is contained in:
Tim Head 2014-11-17 01:03:10 +01:00
parent bb3591fa17
commit e91356c0c1
2 changed files with 258 additions and 1 deletions

View File

@ -1,6 +1,86 @@
these-3-words
=============
Address any 3x3m square on earth with a unique three word name.
Address any 3x3meter square on earth with a unique three word name.
Inspired by http://what3words.com/
example
=======
>>> import thesethreewords as these
# the home of particle physics
>>> CERN = (46.232355, 6.055419)
>>> three = these.three_words(CERN)
>>> print three
'engirt-aleutic-canun'
>>> these.decode(three)
(46.232335567474365, 6.055419445037842)
Check out where this is on [google maps][cernmap].
requirements
============
You need to install the [geohash][geohash] library:
$ pip install geohash
six words
=========
There are a lot of 3x3m squares on the earth's surface. To encode
them in only three words requires a long wordlist, as a result
some fairly obscure words get on it. If you can live with
having to remember six words the wordlist is much shorter.
The six word wordlist comes from the amazing [humanhash][humanhash]
library. Words were chosen to maximise clarity in human
communication.
>>> six = these.six_words(CERN)
>>> print six
'spaghetti-carolina-kentucky-oscar-iowa-table'
>>> these.decode(six)
(46.232335567474365, 6.055419445037842)
how it works
============
Each latitude/longitude pair is converted to a nine
character geohash. This provides about 3meter
resolution at all latitudes. The geohash is then
converted to an integer which is encoded as a string
of words.
The wordlist used to encode the `geohash` into just
three words uses your local computers dictionary. Some
attempts are made to remove really obscure words but
it could be better. You need to use the same wordlist
when encoding and decoding a `these-3-words` hash.
The `these-3-words` hash shares the
property of a `geohash` that nearby locations share
have similar `these-3-words` hashes
>>> other_CERN_site = (46.256811, 6.056792)
>>> six = these.six_words(other_CERN_site)
>>> print six
''spaghetti-carolina-kentucky-utah-seventeen-neptune'
>>> these.decode(six)
(46.256797313690186, 6.056792736053467)
The other CERN site is [here][othercernmap].
this is a [@betatim][betatim] kind of idea
[humanhash]: https://github.com/zacharyvoase/humanhash
[geohash]: https://code.google.com/p/python-geohash/
[cernmap]: https://www.google.ch/maps/place/46%C2%B013'56.4%22N+6%C2%B003'19.5%22E/@46.2323356,6.0554194,17z/data=!3m1!4b1!4m2!3m1!1s0x0:0x0
[othercernmap]: https://www.google.ch/maps/place/46%C2%B015'24.5%22N+6%C2%B003'24.4%22E/@46.256811,6.056792,14z/data=!4m2!3m1!1s0x0:0x0
[betatim]: https://twitter.com/betatim

177
thesethreewords.py Normal file
View File

@ -0,0 +1,177 @@
import random
import geohash
def get_random_words():
words = open("/usr/share/dict/words")
random.seed(3346346)
useful = []
for w in words:
w = w.strip()
if 5 <= len(w) < 8:
useful.append(w.lower())
words.close()
useful = useful[:2**15]
random.shuffle(useful)
assert len(useful) == 2**15
return useful
RANDOM_WORDLIST = get_random_words()
# Human friendly word list, taken directly from humanhash project
HUMAN_WORDLIST = (
'ack', 'alabama', 'alanine', 'alaska', 'alpha', 'angel', 'apart', 'april',
'arizona', 'arkansas', 'artist', 'asparagus', 'aspen', 'august', 'autumn',
'avocado', 'bacon', 'bakerloo', 'batman', 'beer', 'berlin', 'beryllium',
'black', 'blossom', 'blue', 'bluebird', 'bravo', 'bulldog', 'burger',
'butter', 'california', 'carbon', 'cardinal', 'carolina', 'carpet', 'cat',
'ceiling', 'charlie', 'chicken', 'coffee', 'cola', 'cold', 'colorado',
'comet', 'connecticut', 'crazy', 'cup', 'dakota', 'december', 'delaware',
'delta', 'diet', 'don', 'double', 'early', 'earth', 'east', 'echo',
'edward', 'eight', 'eighteen', 'eleven', 'emma', 'enemy', 'equal',
'failed', 'fanta', 'fifteen', 'fillet', 'finch', 'fish', 'five', 'fix',
'floor', 'florida', 'football', 'four', 'fourteen', 'foxtrot', 'freddie',
'friend', 'fruit', 'gee', 'georgia', 'glucose', 'golf', 'green', 'grey',
'hamper', 'happy', 'harry', 'hawaii', 'helium', 'high', 'hot', 'hotel',
'hydrogen', 'idaho', 'illinois', 'india', 'indigo', 'ink', 'iowa',
'island', 'item', 'jersey', 'jig', 'johnny', 'juliet', 'july', 'jupiter',
'kansas', 'kentucky', 'kilo', 'king', 'kitten', 'lactose', 'lake', 'lamp',
'lemon', 'leopard', 'lima', 'lion', 'lithium', 'london', 'louisiana',
'low', 'magazine', 'magnesium', 'maine', 'mango', 'march', 'mars',
'maryland', 'massachusetts', 'may', 'mexico', 'michigan', 'mike',
'minnesota', 'mirror', 'mississippi', 'missouri', 'mobile', 'mockingbird',
'monkey', 'montana', 'moon', 'mountain', 'muppet', 'music', 'nebraska',
'neptune', 'network', 'nevada', 'nine', 'nineteen', 'nitrogen', 'north',
'november', 'nuts', 'october', 'ohio', 'oklahoma', 'one', 'orange',
'oranges', 'oregon', 'oscar', 'oven', 'oxygen', 'papa', 'paris', 'pasta',
'pennsylvania', 'pip', 'pizza', 'pluto', 'potato', 'princess', 'purple',
'quebec', 'queen', 'quiet', 'red', 'river', 'robert', 'robin', 'romeo',
'rugby', 'sad', 'salami', 'saturn', 'september', 'seven', 'seventeen',
'shade', 'sierra', 'single', 'sink', 'six', 'sixteen', 'skylark', 'snake',
'social', 'sodium', 'solar', 'south', 'spaghetti', 'speaker', 'spring',
'stairway', 'steak', 'stream', 'summer', 'sweet', 'table', 'tango', 'ten',
'tennessee', 'tennis', 'texas', 'thirteen', 'three', 'timing', 'triple',
'twelve', 'twenty', 'two', 'uncle', 'undress', 'uniform', 'uranus', 'utah',
'vegan', 'venus', 'vermont', 'victor', 'video', 'violet', 'virginia',
'washington', 'west', 'whiskey', 'white', 'william', 'winner', 'winter',
'wisconsin', 'wolfram', 'wyoming', 'xray', 'yankee', 'yellow', 'zebra',
'zulu')
class WordHasher(object):
def __init__(self):
"""Convert latitude and longitudes into human readable strings."""
self._symbols = "0123456789bcdefghjkmnpqrstuvwxyz"
self._decode_symbols = dict((ch, i) for (i, ch) in enumerate(self._symbols))
self._encode_symbols = dict((i, ch) for (i, ch) in enumerate(self._symbols))
def three_words(self, (lat, lon)):
"""Convert coordinate to a combination of three words
The coordinate is defined by latitude and longitude
in degrees.
"""
gh = geohash.encode(lat, lon, 9)
words = "-".join(RANDOM_WORDLIST[p] for p in self.to_rugbits(self.geo_to_int(gh)))
return words
def six_words(self, (lat, lon)):
"""Convert coordinate to a combination of six words
The coordinate is defined by latitude and longitude
in degrees.
With six words the word list contains only words
which are short, easy to pronounce and easy distinguish.
"""
gh = geohash.encode(lat, lon, 9)
words = "-".join(HUMAN_WORDLIST[p] for p in self.to_bytes(self.pad(gh)))
return words
def decode(self, words):
"""Decode words back to latitude and longitude"""
words = words.split("-")
if len(words) == 3:
i = self.rugbits_to_int([RANDOM_WORDLIST.index(w) for w in words])
elif len(words) == 6:
i = self.bytes_to_int([HUMAN_WORDLIST.index(w) for w in words])
i = self.unpad(i)
else:
raise RuntimeError("Do not know how to decode set of %i words."%(len(words)))
geo_hash = self.int_to_geo(i)
return geohash.decode(geo_hash)
def geo_to_int(self, geo_hash):
"""Decode `geo_hash` to an integer"""
base = len(self._symbols)
number = 0
for symbol in geo_hash:
number = number*base + self._decode_symbols[symbol]
return number
def int_to_geo(self, integer):
"""Encode `integer` to a geo hash"""
base = len(self._symbols)
symbols = []
while integer > 0:
remainder = integer % base
integer //= base
symbols.append(self._encode_symbols[remainder])
return ''.join(reversed(symbols))
def pad(self, geo_hash):
"""Pad nine character `geo_hash` to 48bit integer"""
assert len(geo_hash) == 9
return self.geo_to_int(geo_hash) * 8
def unpad(self, integer):
"""Remove 3bit of padding to get 45bit geo hash"""
return integer>>3
def to_bytes(self, integer):
"""Convert a 48bit `integer` to a list of 6bytes"""
bytes = [integer & 0b11111111]
for n in xrange(1,6):
div = 2**(n*8)
bytes.append((integer/div) & 0b11111111)
bytes.reverse()
return bytes
def bytes_to_int(self, bytes):
"""Convert a list of 6`bytes` to an integer"""
assert len(bytes) == 6
byte_string = []
for b in bytes:
bs = bin(b)[2:]
bs = "0"*(8-len(bs)) + bs
byte_string.append(bs)
return int(''.join(byte_string), 2)
def to_rugbits(self, integer):
"""Convert a 45bit `integer` to a list of 3rugbits
A rugbit is like a byte but with 15bits instead of eight.
"""
fifteen_bits = 0b111111111111111
rugbits = [(integer/(2**30)) & fifteen_bits,
(integer/(2**15)) & fifteen_bits,
integer & fifteen_bits]
return rugbits
def rugbits_to_int(self, rugbits):
"""Convert a list of `rugbits` to an integer"""
return (rugbits[0] *(2**30)) + (rugbits[1] *(2**15)) + (rugbits[2])
DEFAULT_HASHER = WordHasher()
three_words = DEFAULT_HASHER.three_words
six_words = DEFAULT_HASHER.six_words
decode = DEFAULT_HASHER.decode