Geohash: Difference between revisions
(add decoder) |
|||
Line 172: | Line 172: | ||
function decoder(geo) |
function decoder(geo) |
||
minmaxes, latlong = [[-90.0, 90.0], [-180.0, 180.0]], 2 |
minmaxes, latlong = [[-90.0, 90.0], [-180.0, 180.0]], 2 |
||
for |
for c in [ch for ch in geo], bit in ch2bool[c] |
||
minmaxes[latlong][bit == '1' ? 1 : 2] = sum(minmaxes[latlong]) / 2 |
minmaxes[latlong][bit == '1' ? 1 : 2] = sum(minmaxes[latlong]) / 2 |
||
latlong = 3 - latlong |
latlong = 3 - latlong |
Revision as of 04:57, 21 June 2020
Geohashes are used to represent standard latitude and longitude coordinates as single values in the form of a simple string -- using the digits (0-9) and the letters (B-Z excluding I, L, O). They can vary in length, with more characters in the string representing more precision.
- Task
Generate a Geohash with a desired precision from a coordinate represented as an array of two floating point numbers, latitude and longitude. (Ideally double precision).
- Example 1:
- print (encodeGeohash (for: [51.433718, -0.214126], withPrecision: 2))
- // Result: "gc" (all of Ireland, most of England and Wales, small part of Scotland)
- Example 2:
- print (encodeGeohash (for: [51.433718, -0.214126], withPrecision: 9))
- // Result: "gcpue5hp4" (the umpire's chair on Center Court at Wimbledon)
From the Wikipedia page, geohashes can be "useful in database systems where queries on a single index are much easier or faster than multiple-index queries."
- Reference
Factor
Factor comes with the geohash
vocabulary. See the implementation here.
<lang factor>USING: formatting generalizations geohash io kernel sequences ;
- encode-geohash ( latitude longitude precision -- str )
[ >geohash ] [ head ] bi* ;
! Encoding 51.433718 -0.214126 2 51.433718 -0.214126 9 57.649110 10.407440 11 [
3dup encode-geohash "geohash for [%f, %f], precision %2d = %s\n" printf
] 3 3 mnapply nl
! Decoding "u4pruydqqvj" dup geohash> "coordinates for %s ~= [%f, %f]\n" printf</lang>
- Output:
geohash for [51.433718, -0.214126], precision 2 = gc geohash for [51.433718, -0.214126], precision 9 = gcpue5hp4 geohash for [57.649110, 10.407440], precision 11 = u4pruydqqvj coordinates for u4pruydqqvj ~= [57.649110, 10.407439]
Go
<lang go>package main
import (
"fmt" "strings"
)
type Location struct{ lat, lng float64 }
func (loc Location) String() string { return fmt.Sprintf("[%f, %f]", loc.lat, loc.lng) }
type Range struct{ lower, upper float64 }
var gBase32 = "0123456789bcdefghjkmnpqrstuvwxyz"
func encodeGeohash(loc Location, prec int) string {
latRange := Range{-90, 90} lngRange := Range{-180, 180} var hash strings.Builder hashVal := 0 bits := 0 even := true for hash.Len() < prec { val := loc.lat rng := latRange if even { val = loc.lng rng = lngRange } mid := (rng.lower + rng.upper) / 2 if val > mid { hashVal = (hashVal << 1) + 1 rng = Range{mid, rng.upper} if even { lngRange = Range{mid, lngRange.upper} } else { latRange = Range{mid, latRange.upper} } } else { hashVal <<= 1 if even { lngRange = Range{lngRange.lower, mid} } else { latRange = Range{latRange.lower, mid} } } even = !even if bits < 4 { bits++ } else { bits = 0 hash.WriteByte(gBase32[hashVal]) hashVal = 0 } } return hash.String()
}
func main() {
locs := []Location{ {51.433718, -0.214126}, {51.433718, -0.214126}, {57.64911, 10.40744}, } precs := []int{2, 9, 11}
for i, loc := range locs { geohash := encodeGeohash(loc, precs[i]) fmt.Printf("geohash for %v, precision %-2d = %s\n", loc, precs[i], geohash) }
}</lang>
- Output:
geohash for [51.433718, -0.214126], precision 2 = gc geohash for [51.433718, -0.214126], precision 9 = gcpue5hp4 geohash for [57.649110, 10.407440], precision 11 = u4pruydqqvj
Julia
<lang julia>const ch32 = "0123456789bcdefghjkmnpqrstuvwxyz" const bool2ch = Dict(string(i-1, base=2, pad=5) => ch for (i, ch) in enumerate(ch32)) const ch2bool = Dict(v => k for (k, v) in bool2ch)
function bisect(val, mn, mx, bits)
mid = (mn + mx) / 2 if val < mid bits <<= 1 # push 0 mx = mid # range lower half else bits = (bits << 1) | 1 # push 1 mn = mid # range upper half end return mn, mx, bits
end
function encoder(lat, lng, pre)
latmin, latmax = -90, 90 lngmin, lngmax = -180, 180 bits = Int128(0) for i in 0:5*pre-1 if i % 2 != 0 # odd bit: bisect latitude latmin, latmax, bits = bisect(lat, latmin, latmax, bits) else # even bit: bisect longitude lngmin, lngmax, bits = bisect(lng, lngmin, lngmax, bits) end end # Bits to characters b = string(bits, base=2, pad=5*pre) geo = [bool2ch[b[i*5+1:i*5+5]] for i in 0:pre-1] return prod(geo)
end
function decoder(geo)
minmaxes, latlong = [[-90.0, 90.0], [-180.0, 180.0]], 2 for c in [ch for ch in geo], bit in ch2bool[c] minmaxes[latlong][bit == '1' ? 1 : 2] = sum(minmaxes[latlong]) / 2 latlong = 3 - latlong end return minmaxes
end
for ((lat, lng), pre) in [([51.433718, -0.214126], 2),
([51.433718, -0.214126], 9), ([57.64911, 10.40744] , 11), ([57.64911, 10.40744] , 22)] encoded = encoder(lat, lng, pre) println("encoder(lat=$lat, lng=$lng, pre=$pre) = ", encoded) println("decoded = ", decoder(encoded))
end
</lang>
- Output:
encoder(lat=51.433718, lng=-0.214126, pre=2) = gc decoded = [[50.625, 56.25], [-11.25, 0.0]] encoder(lat=51.433718, lng=-0.214126, pre=9) = gcpue5hp4 decoded = [[51.43369674682617, 51.43373966217041], [-0.21414756774902344, -0.21410465240478516]] encoder(lat=57.64911, lng=10.40744, pre=11) = u4pruydqqvj decoded = [[57.649109959602356, 57.64911130070686], [10.407439023256302, 10.40744036436081]] encoder(lat=57.64911, lng=10.40744, pre=22) = u4pruydqqvj8pr9yc27rjr decoded = [[57.64911, 57.64911000000001], [10.407439999999998, 10.407440000000008]]
Phix
<lang Phix>constant gBase32 = "0123456789bcdefghjkmnpqrstuvwxyz"
function encode_geohash(sequence location, integer precision)
sequence r = {{-90,90},{-180,180}} -- lat/long integer ll = 2, -- " " " hashval = 0, bits = 0 string hash = "" while length(hash) < precision do atom mid = sum(r[ll])/2, gt = location[ll]>mid hashval = hashval*2+gt r[ll][2-gt] = mid bits += 1 if bits=5 then hash &= gBase32[hashval+1] {hashval,bits} = {0,0} end if ll = 3-ll -- (1 <==> 2) end while return hash
end function
function decode_geohash(string hash) -- output is {{lat_lo,lat_hi},{long_lo,long_hi}}
sequence r = {{-90,90},{-180,180}} -- lat/long integer ll = 2 -- " " " for h=1 to length(hash) do string b = sprintf("%05b",find(hash[h],gBase32)-1) for it=1 to 5 do r[ll][2-(b[it]='1')] = sum(r[ll])/2 ll = 3-ll -- (1 <==> 2) end for end for return r
end function
sequence tests = {{{51.433718, -0.214126}, 2},
{{51.433718, -0.214126}, 9}, {{57.64911, 10.40744 }, 11}, {{57.64911, 10.40744 }, 22}}
for i=1 to length(tests) do
{sequence location, integer precision} = tests[i] string geohash = encode_geohash(location, precision) printf(1,"geohash for %v, precision %d = %s\n",{location, precision, geohash}) tests[i] = geohash
end for
printf(1,"\ndecode tests:\n") tests = append(tests,"ezs42") for i=1 to length(tests) do
printf(1,"%-22s ==> %v\n",{tests[i],decode_geohash(tests[i])})
end for</lang>
- Output:
geohash for {51.433718,-0.214126}, precision 2 = gc geohash for {51.433718,-0.214126}, precision 9 = gcpue5hp4 geohash for {57.64911,10.40744}, precision 11 = u4pruydqqvj geohash for {57.64911,10.40744}, precision 22 = u4pruydqqvj8pr9yc27rjr decode tests: gc ==> {{50.625,56.25},{-11.25,0}} gcpue5hp4 ==> {{51.43369675,51.43373966},{-0.2141475677,-0.2141046524}} u4pruydqqvj ==> {{57.64910996,57.6491113},{10.40743902,10.40744036}} u4pruydqqvj8pr9yc27rjr ==> {{57.64911,57.64911},{10.40744,10.40744}} ezs42 ==> {{42.58300781,42.62695312},{-5.625,-5.581054688}}
Not surprisingly, "gc" is not even accurate to one significant digit, but a precision of 9 is accurate to 5 or 6 significant digits, 11 to 6 or 7 digits, and 22 exceeds the natural 10 sig digs of %v. Note that 32-bit gives a last character of 'q' for the precision 22 test, for obvious reasons, the above results are from using the 64-bit interpreter.
Python
<lang python>ch32 = "0123456789bcdefghjkmnpqrstuvwxyz" bool2ch = {f"{i:05b}": ch for i, ch in enumerate(ch32)}
def bisect(val, mn, mx, bits):
mid = (mn + mx) / 2 if val < mid: bits <<= 1 # push 0 mx = mid # range lower half else: bits = bits << 1 | 1 # push 1 mn = mid # range upper half
return mn, mx, bits
def encoder(lat, lng, pre):
latmin, latmax = -90, 90 lngmin, lngmax = -180, 180 bits = 0 for i in range(pre * 5): if i % 2: # odd bit: bisect latitude latmin, latmax, bits = bisect(lat, latmin, latmax, bits) else: # even bit: bisect longitude lngmin, lngmax, bits = bisect(lng, lngmin, lngmax, bits) # Bits to characters b = f"{bits:0{pre * 5}b}" geo = (bool2ch[b[i*5: (i+1)*5]] for i in range(pre))
return .join(geo)
if __name__ == '__main__':
for (lat, lng), pre in [([51.433718, -0.214126], 2), ([51.433718, -0.214126], 9), ([57.64911, 10.40744] , 11), ([57.64911, 10.40744] , 22)]: print("encoder(lat=%f, lng=%f, pre=%i) = %r" % (lat, lng, pre, encoder(lat, lng, pre)))</lang>
- Output:
encoder(lat=51.433718, lng=-0.214126, pre=2) = 'gc' encoder(lat=51.433718, lng=-0.214126, pre=9) = 'gcpue5hp4' encoder(lat=57.649110, lng=10.407440, pre=11) = 'u4pruydqqvj' encoder(lat=57.649110, lng=10.407440, pre=22) = 'u4pruydqqvj8pr9yc27rjr'
Note: The precision can be increased but would need lattitude and longitude expressed with more precision than floats, e.g. fractions or decimals for more accurate results. (but the encoder function would not need changing due to Duck Typing).
Raku
Module based
Reference: Used this for verification. <lang perl6>#20200615 Raku programming solution
use Geo::Hash;
- task example 1 : Ireland, most of England and Wales, small part of Scotland
say geo-encode(51.433718e0, -0.214126e0, 2);
- task example 2 : the umpire's chair on Center Court at Wimbledon
say geo-encode(51.433718e0, -0.214126e0, 9);
- Lake Raku is an artificial lake in Tallinn, Estonia
- https://goo.gl/maps/MEBXXhiFbN8WMo5z8
say geo-encode(59.358639e0, 24.744778e0, 4);
- Village Raku is a development committee in north-western Nepal
- https://goo.gl/maps/33s7k2h3UrHCg8Tb6
say geo-encode(29.2021188e0, 81.5324561e0, 4);</lang>
- Output:
gc gcpue5hp4 ud99 tv1y
Roll your own
Alternately, a roll-your-own version that will work with any Real coordinate, not just floating point values, and thus can return ridiculous precision. The geo-decode routine returns the range in which the actual value will be found; converted here to the mid-point with the interval size. Probably better to specify an odd precision so the error interval ends up the same for both latitude and longitude.
<lang perl6>my @Geo32 = <0 1 2 3 4 5 6 7 8 9 b c d e f g h j k m n p q r s t u v w x y z>;
sub geo-encode ( Rat(Real) $latitude, Rat(Real) $longitude, Int $precision = 9 ) {
my @coord = $latitude, $longitude; my @range = [-90, 90], [-180, 180]; my $which = 1; my $value = ; while $value.chars < $precision * 5 { my $mid = @range[$which].sum / 2; $value ~= my $upper = +(@coord[$which] > $mid); @range[$which][not $upper] = $mid; $which = not $which; } @Geo32[$value.comb(5)».parse-base(2)].join;
}
sub geo-decode ( Str $geo ) {
my @range = [-90, 90], [-180, 180]; my $which = 1; my %Geo32 = @Geo32.antipairs; for %Geo32{$geo.comb}».fmt('%05b').join.comb { @range[$which][$_] = @range[$which].sum / 2; $which = not $which; } @range
}
- TESTING
for 51.433718, -0.214126, 2, # Ireland, most of England and Wales, small part of Scotland
51.433718, -0.214126, 9, # the umpire's chair on Center Court at Wimbledon 51.433718, -0.214126, 17, # likely an individual molecule of the chair 57.649110, 10.407440, 11, # Wikipedia test value - Råbjerg Mile in Denmark 59.358639, 24.744778, 7, # Lake Raku in Estonia 29.2021188, 81.5324561, 7 # Village Raku in Nepal -> $lat, $long, $precision { say "$lat, $long, $precision:\ngeo-encoded: ", my $enc = geo-encode $lat, $long, $precision; say 'geo-decoded: ', geo-decode($enc).map( {-.sum/2 ~ ' ± ' ~ (abs(.[0]-.[1])/2).Num.fmt('%.3e')} ).join(', ') ~ "\n";
}</lang>
51.433718, -0.214126, 2: geo-encoded: gc geo-decoded: 53.4375 ± 2.813e+00, -5.625 ± 5.625e+00 51.433718, -0.214126, 9: geo-encoded: gcpue5hp4 geo-decoded: 51.4337182 ± 2.146e-05, -0.21412611 ± 2.146e-05 51.433718, -0.214126, 17: geo-encoded: gcpue5hp4ebnf8unc geo-decoded: 51.43371800000523 ± 2.046e-11, -0.21412600000303 ± 2.046e-11 57.64911, 10.40744, 11: geo-encoded: u4pruydqqvj geo-decoded: 57.64911063 ± 6.706e-07, 10.407439694 ± 6.706e-07 59.358639, 24.744778, 7: geo-encoded: ud99ejf geo-decoded: 59.358444 ± 6.866e-04, 24.744644 ± 6.866e-04 29.2021188, 81.5324561, 7: geo-encoded: tv1ypk4 geo-decoded: 29.202347 ± 6.866e-04, 81.532974 ± 6.866e-04
Swift
<lang Swift>let base32 = "0123456789bcdefghjkmnpqrstuvwxyz" // no "a", "i", "l", or "o"
extension String {
subscript(i: Int) -> String { String(self[index(startIndex, offsetBy: i)]) }
}
struct Coordinate {
var latitude: Double var longitude: Double
func toString() -> String { var latitudeHemisphere = "" var longitudeHemisphere = ""
latitudeHemisphere = latitude < 0 ? " S" : " N" longitudeHemisphere = longitude < 0 ? " W" : " E"
return "\(abs(latitude))\(latitudeHemisphere), \(abs(longitude))\(longitudeHemisphere)" }
}
func encodeGeohash (for coordinate: Coordinate, withPrecision precision: Int = 9) -> String {
var latitudeRange = -90.0...90.0 var longitudeRange = -180...180.0
var hash = "" var hashVal = 0 var bits = 0 var even = true
while (hash.count < precision) { let val = even ? coordinate.longitude: coordinate.latitude var range = even ? longitudeRange : latitudeRange let mid = (range.lowerBound + range.upperBound) / 2
if (val > mid) { hashVal = (hashVal << 1) + 1 range = mid...range.upperBound
if even { longitudeRange = mid...longitudeRange.upperBound } else { latitudeRange = mid...latitudeRange.upperBound } } else { hashVal = (hashVal << 1) + 0 range = range.lowerBound...mid
if even { longitudeRange = longitudeRange.lowerBound...mid } else { latitudeRange = latitudeRange.lowerBound...mid } }
even = !even
if (bits < 4) { bits += 1 } else { bits = 0 hash += base32[hashVal] hashVal = 0 } } return hash
}
let coordinate1 = Coordinate(latitude: 51.433718, longitude: -0.214126) let coordinate2 = Coordinate(latitude: 57.649110, longitude: 10.407440)
print ("Geohash for: \(coordinate1.toString()), precision = 5 : \(encodeGeohash(for: coordinate, withPrecision: 5))") print ("Geohash for: \(coordinate1.toString()), precision = 9 : \(encodeGeohash(for: coordinate))") print ("Geohash for: \(coordinate2.toString()), precision = 11 : \(encodeGeohash(for: coordinate, withPrecision: 11))")
</lang>
- Output:
Geohash for: 51.433718 N, 0.214126 W, precision = 5 : gcpue Geohash for: 51.433718 N, 0.214126 W, precision = 9 : gcpue5hp4 Geohash for: 57.64911 N, 10.40744 E, precision = 11 : u4pruydqqvj
Wren
<lang ecmascript>import "/fmt" for Fmt
var gBase32 = "0123456789bcdefghjkmnpqrstuvwxyz"
var encodeGeohash = Fn.new { |location, prec|
var latRange = -90..90 var lngRange = -180..180 var hash = "" var hashVal = 0 var bits = 0 var even = true while (hash.count < prec) { var val = even ? location[1] : location[0] var rng = even ? lngRange : latRange var mid = (rng.from + rng.to) / 2 if (val > mid) { hashVal = hashVal*2 + 1 rng = mid..rng.to if (even) lngRange = mid..lngRange.to else latRange = mid..latRange.to } else { hashVal = hashVal * 2 if (even) lngRange = lngRange.from..mid else latRange = latRange.from..mid } even = !even if (bits < 4) { bits = bits + 1 } else { bits = 0 hash = hash + gBase32[hashVal] hashVal = 0 } } return hash
}
var data = [
[[51.433718, -0.214126], 2], [[51.433718, -0.214126], 9], [[57.64911, 10.40744 ], 11]
]
for (d in data) {
var geohash = encodeGeohash.call(d[0], d[1]) var loc = "[%(Fmt.f(9, d[0][0], 6)), %(Fmt.f(9, d[0][1], 6))]" System.print("geohash for %(loc), precision %(Fmt.d(-2, d[1])) = %(geohash)")
}</lang>
- Output:
geohash for [51.433718, -0.214126], precision 2 = gc geohash for [51.433718, -0.214126], precision 9 = gcpue5hp4 geohash for [57.649110, 10.407440], precision 11 = u4pruydqqvj