From e91356c0c1df44b6f4040ad512595f2ed620a9b0 Mon Sep 17 00:00:00 2001 From: Tim Head Date: Mon, 17 Nov 2014 01:03:10 +0100 Subject: [PATCH] Initial commit --- README.md | 82 ++++++++++++++++++++- thesethreewords.py | 177 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 258 insertions(+), 1 deletion(-) create mode 100644 thesethreewords.py diff --git a/README.md b/README.md index 1afa949..040d682 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,86 @@ these-3-words ============= -Address any 3x3m square on earth with a unique three word name. +Address any 3x3meter square on earth with a unique three word name. Inspired by http://what3words.com/ + + +example +======= + + >>> import thesethreewords as these + + # the home of particle physics + >>> CERN = (46.232355, 6.055419) + + >>> three = these.three_words(CERN) + >>> print three + 'engirt-aleutic-canun' + >>> these.decode(three) + (46.232335567474365, 6.055419445037842) + +Check out where this is on [google maps][cernmap]. + + +requirements +============ + +You need to install the [geohash][geohash] library: + + $ pip install geohash + + +six words +========= + +There are a lot of 3x3m squares on the earth's surface. To encode +them in only three words requires a long wordlist, as a result +some fairly obscure words get on it. If you can live with +having to remember six words the wordlist is much shorter. +The six word wordlist comes from the amazing [humanhash][humanhash] +library. Words were chosen to maximise clarity in human +communication. + + >>> six = these.six_words(CERN) + >>> print six + 'spaghetti-carolina-kentucky-oscar-iowa-table' + >>> these.decode(six) + (46.232335567474365, 6.055419445037842) + + +how it works +============ + +Each latitude/longitude pair is converted to a nine +character geohash. This provides about 3meter +resolution at all latitudes. The geohash is then +converted to an integer which is encoded as a string +of words. + +The wordlist used to encode the `geohash` into just +three words uses your local computers dictionary. Some +attempts are made to remove really obscure words but +it could be better. You need to use the same wordlist +when encoding and decoding a `these-3-words` hash. + +The `these-3-words` hash shares the +property of a `geohash` that nearby locations share +have similar `these-3-words` hashes + + >>> other_CERN_site = (46.256811, 6.056792) + >>> six = these.six_words(other_CERN_site) + >>> print six + ''spaghetti-carolina-kentucky-utah-seventeen-neptune' + >>> these.decode(six) + (46.256797313690186, 6.056792736053467) + +The other CERN site is [here][othercernmap]. + +this is a [@betatim][betatim] kind of idea + +[humanhash]: https://github.com/zacharyvoase/humanhash +[geohash]: https://code.google.com/p/python-geohash/ +[cernmap]: https://www.google.ch/maps/place/46%C2%B013'56.4%22N+6%C2%B003'19.5%22E/@46.2323356,6.0554194,17z/data=!3m1!4b1!4m2!3m1!1s0x0:0x0 +[othercernmap]: https://www.google.ch/maps/place/46%C2%B015'24.5%22N+6%C2%B003'24.4%22E/@46.256811,6.056792,14z/data=!4m2!3m1!1s0x0:0x0 +[betatim]: https://twitter.com/betatim \ No newline at end of file diff --git a/thesethreewords.py b/thesethreewords.py new file mode 100644 index 0000000..d24eb88 --- /dev/null +++ b/thesethreewords.py @@ -0,0 +1,177 @@ +import random + +import geohash + + +def get_random_words(): + words = open("/usr/share/dict/words") + random.seed(3346346) + useful = [] + for w in words: + w = w.strip() + if 5 <= len(w) < 8: + useful.append(w.lower()) + + words.close() + useful = useful[:2**15] + random.shuffle(useful) + assert len(useful) == 2**15 + return useful +RANDOM_WORDLIST = get_random_words() + +# Human friendly word list, taken directly from humanhash project +HUMAN_WORDLIST = ( + 'ack', 'alabama', 'alanine', 'alaska', 'alpha', 'angel', 'apart', 'april', + 'arizona', 'arkansas', 'artist', 'asparagus', 'aspen', 'august', 'autumn', + 'avocado', 'bacon', 'bakerloo', 'batman', 'beer', 'berlin', 'beryllium', + 'black', 'blossom', 'blue', 'bluebird', 'bravo', 'bulldog', 'burger', + 'butter', 'california', 'carbon', 'cardinal', 'carolina', 'carpet', 'cat', + 'ceiling', 'charlie', 'chicken', 'coffee', 'cola', 'cold', 'colorado', + 'comet', 'connecticut', 'crazy', 'cup', 'dakota', 'december', 'delaware', + 'delta', 'diet', 'don', 'double', 'early', 'earth', 'east', 'echo', + 'edward', 'eight', 'eighteen', 'eleven', 'emma', 'enemy', 'equal', + 'failed', 'fanta', 'fifteen', 'fillet', 'finch', 'fish', 'five', 'fix', + 'floor', 'florida', 'football', 'four', 'fourteen', 'foxtrot', 'freddie', + 'friend', 'fruit', 'gee', 'georgia', 'glucose', 'golf', 'green', 'grey', + 'hamper', 'happy', 'harry', 'hawaii', 'helium', 'high', 'hot', 'hotel', + 'hydrogen', 'idaho', 'illinois', 'india', 'indigo', 'ink', 'iowa', + 'island', 'item', 'jersey', 'jig', 'johnny', 'juliet', 'july', 'jupiter', + 'kansas', 'kentucky', 'kilo', 'king', 'kitten', 'lactose', 'lake', 'lamp', + 'lemon', 'leopard', 'lima', 'lion', 'lithium', 'london', 'louisiana', + 'low', 'magazine', 'magnesium', 'maine', 'mango', 'march', 'mars', + 'maryland', 'massachusetts', 'may', 'mexico', 'michigan', 'mike', + 'minnesota', 'mirror', 'mississippi', 'missouri', 'mobile', 'mockingbird', + 'monkey', 'montana', 'moon', 'mountain', 'muppet', 'music', 'nebraska', + 'neptune', 'network', 'nevada', 'nine', 'nineteen', 'nitrogen', 'north', + 'november', 'nuts', 'october', 'ohio', 'oklahoma', 'one', 'orange', + 'oranges', 'oregon', 'oscar', 'oven', 'oxygen', 'papa', 'paris', 'pasta', + 'pennsylvania', 'pip', 'pizza', 'pluto', 'potato', 'princess', 'purple', + 'quebec', 'queen', 'quiet', 'red', 'river', 'robert', 'robin', 'romeo', + 'rugby', 'sad', 'salami', 'saturn', 'september', 'seven', 'seventeen', + 'shade', 'sierra', 'single', 'sink', 'six', 'sixteen', 'skylark', 'snake', + 'social', 'sodium', 'solar', 'south', 'spaghetti', 'speaker', 'spring', + 'stairway', 'steak', 'stream', 'summer', 'sweet', 'table', 'tango', 'ten', + 'tennessee', 'tennis', 'texas', 'thirteen', 'three', 'timing', 'triple', + 'twelve', 'twenty', 'two', 'uncle', 'undress', 'uniform', 'uranus', 'utah', + 'vegan', 'venus', 'vermont', 'victor', 'video', 'violet', 'virginia', + 'washington', 'west', 'whiskey', 'white', 'william', 'winner', 'winter', + 'wisconsin', 'wolfram', 'wyoming', 'xray', 'yankee', 'yellow', 'zebra', + 'zulu') + + +class WordHasher(object): + def __init__(self): + """Convert latitude and longitudes into human readable strings.""" + self._symbols = "0123456789bcdefghjkmnpqrstuvwxyz" + self._decode_symbols = dict((ch, i) for (i, ch) in enumerate(self._symbols)) + self._encode_symbols = dict((i, ch) for (i, ch) in enumerate(self._symbols)) + + + def three_words(self, (lat, lon)): + """Convert coordinate to a combination of three words + + The coordinate is defined by latitude and longitude + in degrees. + """ + gh = geohash.encode(lat, lon, 9) + words = "-".join(RANDOM_WORDLIST[p] for p in self.to_rugbits(self.geo_to_int(gh))) + return words + + def six_words(self, (lat, lon)): + """Convert coordinate to a combination of six words + + The coordinate is defined by latitude and longitude + in degrees. + + With six words the word list contains only words + which are short, easy to pronounce and easy distinguish. + """ + gh = geohash.encode(lat, lon, 9) + words = "-".join(HUMAN_WORDLIST[p] for p in self.to_bytes(self.pad(gh))) + return words + + def decode(self, words): + """Decode words back to latitude and longitude""" + words = words.split("-") + if len(words) == 3: + i = self.rugbits_to_int([RANDOM_WORDLIST.index(w) for w in words]) + + elif len(words) == 6: + i = self.bytes_to_int([HUMAN_WORDLIST.index(w) for w in words]) + i = self.unpad(i) + + else: + raise RuntimeError("Do not know how to decode set of %i words."%(len(words))) + + geo_hash = self.int_to_geo(i) + return geohash.decode(geo_hash) + + def geo_to_int(self, geo_hash): + """Decode `geo_hash` to an integer""" + base = len(self._symbols) + number = 0 + for symbol in geo_hash: + number = number*base + self._decode_symbols[symbol] + + return number + + def int_to_geo(self, integer): + """Encode `integer` to a geo hash""" + base = len(self._symbols) + symbols = [] + while integer > 0: + remainder = integer % base + integer //= base + symbols.append(self._encode_symbols[remainder]) + + return ''.join(reversed(symbols)) + + def pad(self, geo_hash): + """Pad nine character `geo_hash` to 48bit integer""" + assert len(geo_hash) == 9 + return self.geo_to_int(geo_hash) * 8 + + def unpad(self, integer): + """Remove 3bit of padding to get 45bit geo hash""" + return integer>>3 + + def to_bytes(self, integer): + """Convert a 48bit `integer` to a list of 6bytes""" + bytes = [integer & 0b11111111] + for n in xrange(1,6): + div = 2**(n*8) + bytes.append((integer/div) & 0b11111111) + + bytes.reverse() + return bytes + + def bytes_to_int(self, bytes): + """Convert a list of 6`bytes` to an integer""" + assert len(bytes) == 6 + byte_string = [] + for b in bytes: + bs = bin(b)[2:] + bs = "0"*(8-len(bs)) + bs + byte_string.append(bs) + return int(''.join(byte_string), 2) + + def to_rugbits(self, integer): + """Convert a 45bit `integer` to a list of 3rugbits + + A rugbit is like a byte but with 15bits instead of eight. + """ + fifteen_bits = 0b111111111111111 + rugbits = [(integer/(2**30)) & fifteen_bits, + (integer/(2**15)) & fifteen_bits, + integer & fifteen_bits] + return rugbits + + def rugbits_to_int(self, rugbits): + """Convert a list of `rugbits` to an integer""" + return (rugbits[0] *(2**30)) + (rugbits[1] *(2**15)) + (rugbits[2]) + + +DEFAULT_HASHER = WordHasher() +three_words = DEFAULT_HASHER.three_words +six_words = DEFAULT_HASHER.six_words +decode = DEFAULT_HASHER.decode