diff --git a/.gitignore b/.gitignore index db4561e..8a341b0 100644 --- a/.gitignore +++ b/.gitignore @@ -52,3 +52,6 @@ docs/_build/ # PyBuilder target/ + +# google ngram input files +googlebooks-eng-all-1gram-20120701* diff --git a/README.md b/README.md index f34e9f0..fb07019 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ example >>> three = these.three_words(CERN) >>> print three - 'engirt-aleutic-canun' + 'treaty-crane-seldes' >>> these.decode(three) (46.232335567474365, 6.055419445037842) @@ -74,7 +74,7 @@ have similar `these-3-words` hashes >>> other_CERN_site = (46.256811, 6.056792) >>> six = these.six_words(other_CERN_site) >>> print six - ''spaghetti-carolina-kentucky-utah-seventeen-neptune' + 'spaghetti-carolina-kentucky-utah-seventeen-neptune' >>> these.decode(six) (46.256797313690186, 6.056792736053467) diff --git a/thesethreewords.py b/thesethreewords.py index 1009266..e95e9d6 100644 --- a/thesethreewords.py +++ b/thesethreewords.py @@ -8,21 +8,18 @@ import random import geohash -def get_random_words(): - words = open("/usr/share/dict/words") - random.seed(3346346) - useful = [] - for w in words: - w = w.strip() - if 5 <= len(w) < 8: - useful.append(w.lower()) - - words.close() - useful = useful[:2**15] - random.shuffle(useful) - assert len(useful) == 2**15 - return useful -RANDOM_WORDLIST = get_random_words() +def get_google_words(): + lines = open("words/google-ngram-list") + words = [] + for line in lines: + _, word = line.split() + words.append(word) + + lines.close() + random.seed(634634) + random.shuffle(words) + return words +GOOGLE_WORDLIST = get_google_words() # Human friendly word list, taken directly from humanhash project HUMAN_WORDLIST = ( @@ -79,7 +76,7 @@ class WordHasher(object): in degrees. """ gh = geohash.encode(lat, lon, 9) - words = "-".join(RANDOM_WORDLIST[p] for p in self.to_rugbits(self.geo_to_int(gh))) + words = "-".join(GOOGLE_WORDLIST[p] for p in self.to_rugbits(self.geo_to_int(gh))) return words def six_words(self, (lat, lon)): @@ -99,7 +96,7 @@ class WordHasher(object): """Decode words back to latitude and longitude""" words = words.split("-") if len(words) == 3: - i = self.rugbits_to_int([RANDOM_WORDLIST.index(w) for w in words]) + i = self.rugbits_to_int([GOOGLE_WORDLIST.index(w) for w in words]) elif len(words) == 6: i = self.bytes_to_int([HUMAN_WORDLIST.index(w) for w in words]) diff --git a/views/index.html b/views/index.html index 04fb4a8..9a0c195 100644 --- a/views/index.html +++ b/views/index.html @@ -19,7 +19,7 @@ Find a location anywhere in the world identified by three simple words.

- +

@@ -27,7 +27,7 @@