Initial commit
This commit is contained in:
parent
bb3591fa17
commit
e91356c0c1
82
README.md
82
README.md
|
@ -1,6 +1,86 @@
|
|||
these-3-words
|
||||
=============
|
||||
|
||||
Address any 3x3m square on earth with a unique three word name.
|
||||
Address any 3x3meter square on earth with a unique three word name.
|
||||
|
||||
Inspired by http://what3words.com/
|
||||
|
||||
|
||||
example
|
||||
=======
|
||||
|
||||
>>> import thesethreewords as these
|
||||
|
||||
# the home of particle physics
|
||||
>>> CERN = (46.232355, 6.055419)
|
||||
|
||||
>>> three = these.three_words(CERN)
|
||||
>>> print three
|
||||
'engirt-aleutic-canun'
|
||||
>>> these.decode(three)
|
||||
(46.232335567474365, 6.055419445037842)
|
||||
|
||||
Check out where this is on [google maps][cernmap].
|
||||
|
||||
|
||||
requirements
|
||||
============
|
||||
|
||||
You need to install the [geohash][geohash] library:
|
||||
|
||||
$ pip install geohash
|
||||
|
||||
|
||||
six words
|
||||
=========
|
||||
|
||||
There are a lot of 3x3m squares on the earth's surface. To encode
|
||||
them in only three words requires a long wordlist, as a result
|
||||
some fairly obscure words get on it. If you can live with
|
||||
having to remember six words the wordlist is much shorter.
|
||||
The six word wordlist comes from the amazing [humanhash][humanhash]
|
||||
library. Words were chosen to maximise clarity in human
|
||||
communication.
|
||||
|
||||
>>> six = these.six_words(CERN)
|
||||
>>> print six
|
||||
'spaghetti-carolina-kentucky-oscar-iowa-table'
|
||||
>>> these.decode(six)
|
||||
(46.232335567474365, 6.055419445037842)
|
||||
|
||||
|
||||
how it works
|
||||
============
|
||||
|
||||
Each latitude/longitude pair is converted to a nine
|
||||
character geohash. This provides about 3meter
|
||||
resolution at all latitudes. The geohash is then
|
||||
converted to an integer which is encoded as a string
|
||||
of words.
|
||||
|
||||
The wordlist used to encode the `geohash` into just
|
||||
three words uses your local computers dictionary. Some
|
||||
attempts are made to remove really obscure words but
|
||||
it could be better. You need to use the same wordlist
|
||||
when encoding and decoding a `these-3-words` hash.
|
||||
|
||||
The `these-3-words` hash shares the
|
||||
property of a `geohash` that nearby locations share
|
||||
have similar `these-3-words` hashes
|
||||
|
||||
>>> other_CERN_site = (46.256811, 6.056792)
|
||||
>>> six = these.six_words(other_CERN_site)
|
||||
>>> print six
|
||||
''spaghetti-carolina-kentucky-utah-seventeen-neptune'
|
||||
>>> these.decode(six)
|
||||
(46.256797313690186, 6.056792736053467)
|
||||
|
||||
The other CERN site is [here][othercernmap].
|
||||
|
||||
this is a [@betatim][betatim] kind of idea
|
||||
|
||||
[humanhash]: https://github.com/zacharyvoase/humanhash
|
||||
[geohash]: https://code.google.com/p/python-geohash/
|
||||
[cernmap]: https://www.google.ch/maps/place/46%C2%B013'56.4%22N+6%C2%B003'19.5%22E/@46.2323356,6.0554194,17z/data=!3m1!4b1!4m2!3m1!1s0x0:0x0
|
||||
[othercernmap]: https://www.google.ch/maps/place/46%C2%B015'24.5%22N+6%C2%B003'24.4%22E/@46.256811,6.056792,14z/data=!4m2!3m1!1s0x0:0x0
|
||||
[betatim]: https://twitter.com/betatim
|
|
@ -0,0 +1,177 @@
|
|||
import random
|
||||
|
||||
import geohash
|
||||
|
||||
|
||||
def get_random_words():
|
||||
words = open("/usr/share/dict/words")
|
||||
random.seed(3346346)
|
||||
useful = []
|
||||
for w in words:
|
||||
w = w.strip()
|
||||
if 5 <= len(w) < 8:
|
||||
useful.append(w.lower())
|
||||
|
||||
words.close()
|
||||
useful = useful[:2**15]
|
||||
random.shuffle(useful)
|
||||
assert len(useful) == 2**15
|
||||
return useful
|
||||
RANDOM_WORDLIST = get_random_words()
|
||||
|
||||
# Human friendly word list, taken directly from humanhash project
|
||||
HUMAN_WORDLIST = (
|
||||
'ack', 'alabama', 'alanine', 'alaska', 'alpha', 'angel', 'apart', 'april',
|
||||
'arizona', 'arkansas', 'artist', 'asparagus', 'aspen', 'august', 'autumn',
|
||||
'avocado', 'bacon', 'bakerloo', 'batman', 'beer', 'berlin', 'beryllium',
|
||||
'black', 'blossom', 'blue', 'bluebird', 'bravo', 'bulldog', 'burger',
|
||||
'butter', 'california', 'carbon', 'cardinal', 'carolina', 'carpet', 'cat',
|
||||
'ceiling', 'charlie', 'chicken', 'coffee', 'cola', 'cold', 'colorado',
|
||||
'comet', 'connecticut', 'crazy', 'cup', 'dakota', 'december', 'delaware',
|
||||
'delta', 'diet', 'don', 'double', 'early', 'earth', 'east', 'echo',
|
||||
'edward', 'eight', 'eighteen', 'eleven', 'emma', 'enemy', 'equal',
|
||||
'failed', 'fanta', 'fifteen', 'fillet', 'finch', 'fish', 'five', 'fix',
|
||||
'floor', 'florida', 'football', 'four', 'fourteen', 'foxtrot', 'freddie',
|
||||
'friend', 'fruit', 'gee', 'georgia', 'glucose', 'golf', 'green', 'grey',
|
||||
'hamper', 'happy', 'harry', 'hawaii', 'helium', 'high', 'hot', 'hotel',
|
||||
'hydrogen', 'idaho', 'illinois', 'india', 'indigo', 'ink', 'iowa',
|
||||
'island', 'item', 'jersey', 'jig', 'johnny', 'juliet', 'july', 'jupiter',
|
||||
'kansas', 'kentucky', 'kilo', 'king', 'kitten', 'lactose', 'lake', 'lamp',
|
||||
'lemon', 'leopard', 'lima', 'lion', 'lithium', 'london', 'louisiana',
|
||||
'low', 'magazine', 'magnesium', 'maine', 'mango', 'march', 'mars',
|
||||
'maryland', 'massachusetts', 'may', 'mexico', 'michigan', 'mike',
|
||||
'minnesota', 'mirror', 'mississippi', 'missouri', 'mobile', 'mockingbird',
|
||||
'monkey', 'montana', 'moon', 'mountain', 'muppet', 'music', 'nebraska',
|
||||
'neptune', 'network', 'nevada', 'nine', 'nineteen', 'nitrogen', 'north',
|
||||
'november', 'nuts', 'october', 'ohio', 'oklahoma', 'one', 'orange',
|
||||
'oranges', 'oregon', 'oscar', 'oven', 'oxygen', 'papa', 'paris', 'pasta',
|
||||
'pennsylvania', 'pip', 'pizza', 'pluto', 'potato', 'princess', 'purple',
|
||||
'quebec', 'queen', 'quiet', 'red', 'river', 'robert', 'robin', 'romeo',
|
||||
'rugby', 'sad', 'salami', 'saturn', 'september', 'seven', 'seventeen',
|
||||
'shade', 'sierra', 'single', 'sink', 'six', 'sixteen', 'skylark', 'snake',
|
||||
'social', 'sodium', 'solar', 'south', 'spaghetti', 'speaker', 'spring',
|
||||
'stairway', 'steak', 'stream', 'summer', 'sweet', 'table', 'tango', 'ten',
|
||||
'tennessee', 'tennis', 'texas', 'thirteen', 'three', 'timing', 'triple',
|
||||
'twelve', 'twenty', 'two', 'uncle', 'undress', 'uniform', 'uranus', 'utah',
|
||||
'vegan', 'venus', 'vermont', 'victor', 'video', 'violet', 'virginia',
|
||||
'washington', 'west', 'whiskey', 'white', 'william', 'winner', 'winter',
|
||||
'wisconsin', 'wolfram', 'wyoming', 'xray', 'yankee', 'yellow', 'zebra',
|
||||
'zulu')
|
||||
|
||||
|
||||
class WordHasher(object):
|
||||
def __init__(self):
|
||||
"""Convert latitude and longitudes into human readable strings."""
|
||||
self._symbols = "0123456789bcdefghjkmnpqrstuvwxyz"
|
||||
self._decode_symbols = dict((ch, i) for (i, ch) in enumerate(self._symbols))
|
||||
self._encode_symbols = dict((i, ch) for (i, ch) in enumerate(self._symbols))
|
||||
|
||||
|
||||
def three_words(self, (lat, lon)):
|
||||
"""Convert coordinate to a combination of three words
|
||||
|
||||
The coordinate is defined by latitude and longitude
|
||||
in degrees.
|
||||
"""
|
||||
gh = geohash.encode(lat, lon, 9)
|
||||
words = "-".join(RANDOM_WORDLIST[p] for p in self.to_rugbits(self.geo_to_int(gh)))
|
||||
return words
|
||||
|
||||
def six_words(self, (lat, lon)):
|
||||
"""Convert coordinate to a combination of six words
|
||||
|
||||
The coordinate is defined by latitude and longitude
|
||||
in degrees.
|
||||
|
||||
With six words the word list contains only words
|
||||
which are short, easy to pronounce and easy distinguish.
|
||||
"""
|
||||
gh = geohash.encode(lat, lon, 9)
|
||||
words = "-".join(HUMAN_WORDLIST[p] for p in self.to_bytes(self.pad(gh)))
|
||||
return words
|
||||
|
||||
def decode(self, words):
|
||||
"""Decode words back to latitude and longitude"""
|
||||
words = words.split("-")
|
||||
if len(words) == 3:
|
||||
i = self.rugbits_to_int([RANDOM_WORDLIST.index(w) for w in words])
|
||||
|
||||
elif len(words) == 6:
|
||||
i = self.bytes_to_int([HUMAN_WORDLIST.index(w) for w in words])
|
||||
i = self.unpad(i)
|
||||
|
||||
else:
|
||||
raise RuntimeError("Do not know how to decode set of %i words."%(len(words)))
|
||||
|
||||
geo_hash = self.int_to_geo(i)
|
||||
return geohash.decode(geo_hash)
|
||||
|
||||
def geo_to_int(self, geo_hash):
|
||||
"""Decode `geo_hash` to an integer"""
|
||||
base = len(self._symbols)
|
||||
number = 0
|
||||
for symbol in geo_hash:
|
||||
number = number*base + self._decode_symbols[symbol]
|
||||
|
||||
return number
|
||||
|
||||
def int_to_geo(self, integer):
|
||||
"""Encode `integer` to a geo hash"""
|
||||
base = len(self._symbols)
|
||||
symbols = []
|
||||
while integer > 0:
|
||||
remainder = integer % base
|
||||
integer //= base
|
||||
symbols.append(self._encode_symbols[remainder])
|
||||
|
||||
return ''.join(reversed(symbols))
|
||||
|
||||
def pad(self, geo_hash):
|
||||
"""Pad nine character `geo_hash` to 48bit integer"""
|
||||
assert len(geo_hash) == 9
|
||||
return self.geo_to_int(geo_hash) * 8
|
||||
|
||||
def unpad(self, integer):
|
||||
"""Remove 3bit of padding to get 45bit geo hash"""
|
||||
return integer>>3
|
||||
|
||||
def to_bytes(self, integer):
|
||||
"""Convert a 48bit `integer` to a list of 6bytes"""
|
||||
bytes = [integer & 0b11111111]
|
||||
for n in xrange(1,6):
|
||||
div = 2**(n*8)
|
||||
bytes.append((integer/div) & 0b11111111)
|
||||
|
||||
bytes.reverse()
|
||||
return bytes
|
||||
|
||||
def bytes_to_int(self, bytes):
|
||||
"""Convert a list of 6`bytes` to an integer"""
|
||||
assert len(bytes) == 6
|
||||
byte_string = []
|
||||
for b in bytes:
|
||||
bs = bin(b)[2:]
|
||||
bs = "0"*(8-len(bs)) + bs
|
||||
byte_string.append(bs)
|
||||
return int(''.join(byte_string), 2)
|
||||
|
||||
def to_rugbits(self, integer):
|
||||
"""Convert a 45bit `integer` to a list of 3rugbits
|
||||
|
||||
A rugbit is like a byte but with 15bits instead of eight.
|
||||
"""
|
||||
fifteen_bits = 0b111111111111111
|
||||
rugbits = [(integer/(2**30)) & fifteen_bits,
|
||||
(integer/(2**15)) & fifteen_bits,
|
||||
integer & fifteen_bits]
|
||||
return rugbits
|
||||
|
||||
def rugbits_to_int(self, rugbits):
|
||||
"""Convert a list of `rugbits` to an integer"""
|
||||
return (rugbits[0] *(2**30)) + (rugbits[1] *(2**15)) + (rugbits[2])
|
||||
|
||||
|
||||
DEFAULT_HASHER = WordHasher()
|
||||
three_words = DEFAULT_HASHER.three_words
|
||||
six_words = DEFAULT_HASHER.six_words
|
||||
decode = DEFAULT_HASHER.decode
|
Loading…
Reference in New Issue