You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

179 lines
6.0 KiB
Python

BASE_32 = '0123456789bcdefghjkmnpqrstuvwxyz'
class Geohash(object):
'''
Simplistic implementation of a hash function that encodes coordinate pairs
to a BASE32 string.
Coordinates are interleaved, meaning that the BASE32 encoding can be
truncated and still give valid coordinates, but with less spatial accuracy.
Inspired by geohash.org; Ported implementation from https://developer-should-know.tumblr.com/post/87283491372/geohash-encoding-and-decoding-algorithm
'''
NUMERIC = 'numeric_lsb'
NUMERIC_MSB = 'numeric_msb'
BASE32 = 'base32'
BITSTRING = 'bitstring'
LAT_RANGE = (-90.0, 90.0)
LON_RANGE = (-180.0, 180.0)
def divideRangeByValue(value, valRange):
'''
Splits `valRange` in half (in place) so that `value` is within that range.
Returns 1 if the upper half was chosen, 0 for the lower half.
'''
mid = Geohash.middle(valRange)
if value >= mid:
valRange[0] = mid
return 1
else:
valRange[1] = mid
return 0
def divideRangeByBit(bit, valRange):
mid = Geohash.middle(valRange)
if bit > 0:
valRange[0] = mid
else:
valRange[1] = mid
def middle(valRange):
return (valRange[0] + valRange[1]) / 2.0
def intToBitstring(val, precision):
''' converts an LSB-first integer to a MSB-first bitstring
'''
result = ''
while val:
result += str(1 & val)
val >>= 1
# if the last bits are all zero, the loop stops too early; append missing zeros
missing = precision - len(result)
if missing:
result += ''.join(['0'] * missing)
return result
def intToBase32(val):
''' convert an LSB-first integer to base32 encoding
'''
result = ''
bitmask = 0b11111 # (one base32 char encodes 5 bit)
i = 0
while val:
masked = (bitmask & val) >> (5 * i) # mask 5 bits of the hash
val &= ~bitmask # zero out the processed bits (loop termination)
bitmask <<= 5 # mask the next 5 bits
i += 1
base32Index = 0 # reverse bits in groups of 5, as they are LSB-first
for k in range(5):
bitK = (masked & (1 << k)) >> k
base32Index |= bitK << (4 - k)
result += BASE_32[base32Index] # convert to BASE32
return result
def base32ToInt(string):
''' convert a base32 string to LSB-first integer
'''
bitsDecoded = 0
result = 0
for char in string:
base32CharIndex = BASE_32.index(char)
for i in [4,3,2,1,0]: # 5 bits per BASE32 character, start with MSB
bit = (base32CharIndex & (1 << i)) >> i # extract bit
bit <<= bitsDecoded
result |= bit
bitsDecoded += 1
return result
def encodeBits(lat, lon, precision, lsb=True):
'''
encodes a coordinate pair into `precision` interleaved bits
returns a LSB-first integer.
max precision is 128 bits, afterwards we're overflowing
'''
lonRange = list(Geohash.LON_RANGE) # we modify the range, so make a copy
latRange = list(Geohash.LAT_RANGE)
bitsEncoded = 0
geohash = 0
while bitsEncoded < precision:
if bitsEncoded % 2 == 0: # even bit
bit = Geohash.divideRangeByValue(lon, lonRange)
else:
bit = Geohash.divideRangeByValue(lat, latRange)
if lsb:
geohash |= bit << bitsEncoded # LSB first
else:
geohash = (geohash << 1) | bit # MSB first
bitsEncoded += 1
return geohash
def decodeBits(geohash, precision=None):
lonRange = list(Geohash.LON_RANGE) # we modify the range, so make a copy
latRange = list(Geohash.LAT_RANGE)
bitsDecoded = 0
# FIXME problem: we don't know when all bits are processed; we are
# stopping when no more bits are set, and we processed an even count.
# precision could actually be higher, when the remaining bits are all 0,
# but we don't know about that... with base32 we also have that issue (I
# think?), even if the blocksize (5) is known
while geohash or bitsDecoded % 2 != 0:
if precision and bitsDecoded >= precision:
break
bit = geohash & 1 # extract bit
geohash &= ~1 # clear bit
geohash >>= 1 # go for the next bit
if bitsDecoded % 2 == 0:
Geohash.divideRangeByBit(bit, lonRange)
else:
Geohash.divideRangeByBit(bit, latRange)
bitsDecoded += 1
# TODO: allow to return either point or range
return [Geohash.middle(latRange), Geohash.middle(lonRange)]
def decode(geohash, precision = None):
if type(geohash) == str:
precision = len(geohash) * 5
geohash = Geohash.base32ToInt(geohash)
return Geohash.decodeBits(geohash, precision)
def encodePoint(lat, lon, precision, output=NUMERIC):
if output == Geohash.BASE32:
precision *= 5 # one char encodes 5 bit
if output == Geohash.NUMERIC_MSB:
hashNumeric = Geohash.encodeBits(lat, lon, precision, lsb=False)
else:
hashNumeric = Geohash.encodeBits(lat, lon, precision)
if output == Geohash.BASE32:
return Geohash.intToBase32(hashNumeric)
elif output == Geohash.BITSTRING:
return Geohash.intToBitstring(hashNumeric, precision)
else:
return hashNumeric
def encodeRange(xRange, yRange, format=NUMERIC):
# TODO
pass
def test():
assert "u4pruydqqvj8" == Geohash.encodePoint(57.64911, 10.40744, 12, Geohash.BASE32)
print(Geohash.decode("u4pruydqqvj8"))