Geohash

From Rosetta Code
Geohash is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.

Geohashes are used to represent standard latitude and longitude coordinates as single values in the form of a simple string -- using the digits (0-9) and the letters (B-Z excluding I, L, O). They can vary in length, with more characters in the string representing more precision.


Task

Generate a Geohash with a desired precision from a coordinate represented as an array of two floating point numbers, latitude and longitude. (Ideally double precision).


Example 1:
print (encodeGeohash (for: [51.433718, -0.214126], withPrecision: 2))
// Result: "gc" (all of Ireland, most of England and Wales, small part of Scotland)
Example 2:
print (encodeGeohash (for: [51.433718, -0.214126], withPrecision: 9))
// Result: "gcpue5hp4" (the umpire's chair on Center Court at Wimbledon)


From the Wikipedia page, geohashes can be "useful in database systems where queries on a single index are much easier or faster than multiple-index queries."


Extra credit

Provide a decode function to convert a geohash code back to latitude and longitude, expressed either as ranges or in median +/- deviation format.


Reference


Factor

Factor comes with the geohash vocabulary. See the implementation here.

Works with: Factor version 0.99 2020-03-02

<lang factor>USING: formatting generalizations geohash io kernel sequences ;

encode-geohash ( latitude longitude precision -- str )
   [ >geohash ] [ head ] bi* ;

! Encoding 51.433718 -0.214126 2 51.433718 -0.214126 9 57.649110 10.407440 11 [

   3dup encode-geohash
   "geohash for [%f, %f], precision %2d = %s\n" printf

] 3 3 mnapply nl

! Decoding "u4pruydqqvj" dup geohash> "coordinates for %s ~= [%f, %f]\n" printf</lang>

Output:
geohash for [51.433718, -0.214126], precision  2 = gc
geohash for [51.433718, -0.214126], precision  9 = gcpue5hp4
geohash for [57.649110, 10.407440], precision 11 = u4pruydqqvj

coordinates for u4pruydqqvj ~= [57.649110, 10.407439]

Go

Translation of: Swift

<lang go>package main

import (

   "fmt"
   "strings"

)

type Location struct{ lat, lng float64 }

func (loc Location) String() string { return fmt.Sprintf("[%f, %f]", loc.lat, loc.lng) }

type Range struct{ lower, upper float64 }

var gBase32 = "0123456789bcdefghjkmnpqrstuvwxyz"

func encodeGeohash(loc Location, prec int) string {

   latRange := Range{-90, 90}
   lngRange := Range{-180, 180}
   var hash strings.Builder
   hashVal := 0
   bits := 0
   even := true
   for hash.Len() < prec {
       val := loc.lat
       rng := latRange
       if even {
           val = loc.lng
           rng = lngRange
       }
       mid := (rng.lower + rng.upper) / 2
       if val > mid {
           hashVal = (hashVal << 1) + 1
           rng = Range{mid, rng.upper}
           if even {
               lngRange = Range{mid, lngRange.upper}
           } else {
               latRange = Range{mid, latRange.upper}
           }
       } else {
           hashVal <<= 1
           if even {
               lngRange = Range{lngRange.lower, mid}
           } else {
               latRange = Range{latRange.lower, mid}
           }
       }
       even = !even
       if bits < 4 {
           bits++
       } else {
           bits = 0
           hash.WriteByte(gBase32[hashVal])
           hashVal = 0
       }
   }
   return hash.String()

}

func main() {

   locs := []Location{
       {51.433718, -0.214126},
       {51.433718, -0.214126},
       {57.64911, 10.40744},
   }
   precs := []int{2, 9, 11}
   for i, loc := range locs {
       geohash := encodeGeohash(loc, precs[i])
       fmt.Printf("geohash for %v, precision %-2d = %s\n", loc, precs[i], geohash)
   }

}</lang>

Output:
geohash for [51.433718, -0.214126], precision 2  = gc
geohash for [51.433718, -0.214126], precision 9  = gcpue5hp4
geohash for [57.649110, 10.407440], precision 11 = u4pruydqqvj

Julia

Translation of: Python

<lang julia>const ch32 = "0123456789bcdefghjkmnpqrstuvwxyz" const bool2ch = Dict(string(i-1, base=2, pad=5) => ch for (i, ch) in enumerate(ch32)) const ch2bool = Dict(v => k for (k, v) in bool2ch)

function bisect(val, mn, mx, bits)

   mid = (mn + mx) / 2
   if val < mid
       bits <<= 1                        # push 0
       mx = mid                          # range lower half
   else
       bits = (bits << 1) | 1            # push 1
       mn = mid                          # range upper half
   end
   return mn, mx, bits

end

function encoder(lat, lng, pre)

   latmin, latmax = -90, 90
   lngmin, lngmax = -180, 180
   bits = Int128(0)
   for i in 0:5*pre-1
       if i % 2 != 0
           # odd bit: bisect latitude
           latmin, latmax, bits = bisect(lat, latmin, latmax, bits)
       else
           # even bit: bisect longitude
           lngmin, lngmax, bits = bisect(lng, lngmin, lngmax, bits)
       end
   end
   # Bits to characters
   b = string(bits, base=2, pad=5*pre)
   geo = [bool2ch[b[i*5+1:i*5+5]] for i in 0:pre-1]
   return prod(geo)

end

function decoder(geo)

   minmaxes, latlong = [[-90.0, 90.0], [-180.0, 180.0]], 2
   for c in geo, bit in ch2bool[c]
       minmaxes[latlong][bit == '1' ? 1 : 2] = sum(minmaxes[latlong]) / 2
       latlong = 3 - latlong
   end
   return minmaxes

end

for ((lat, lng), pre) in [([51.433718, -0.214126], 2),

                         ([51.433718, -0.214126],  9),
                         ([57.64911,  10.40744] , 11),
                         ([57.64911,  10.40744] , 22)]
   encoded = encoder(lat, lng, pre)
   println("encoder(lat=$lat, lng=$lng, pre=$pre) = ", encoded)
   println("decoded = ", decoder(encoded))

end

</lang>

Output:
encoder(lat=51.433718, lng=-0.214126, pre=2) = gc
decoded = [[50.625, 56.25], [-11.25, 0.0]]
encoder(lat=51.433718, lng=-0.214126, pre=9) = gcpue5hp4
decoded = [[51.43369674682617, 51.43373966217041], [-0.21414756774902344, -0.21410465240478516]]
encoder(lat=57.64911, lng=10.40744, pre=11) = u4pruydqqvj
decoded = [[57.649109959602356, 57.64911130070686], [10.407439023256302, 10.40744036436081]]
encoder(lat=57.64911, lng=10.40744, pre=22) = u4pruydqqvj8pr9yc27rjr
decoded = [[57.64911, 57.64911000000001], [10.407439999999998, 10.407440000000008]]

Phix

<lang Phix>constant gBase32 = "0123456789bcdefghjkmnpqrstuvwxyz"

function encode_geohash(sequence location, integer precision)

   sequence r = {{-90,90},{-180,180}}  -- lat/long
   integer ll = 2,                     --  " " "
           hashval = 0, bits = 0
   string hash = ""
   while length(hash) < precision do
       atom mid = sum(r[ll])/2,
            gt = location[ll]>mid
       hashval = hashval*2+gt
       r[ll][2-gt] = mid
       bits += 1
       if bits=5 then
           hash &= gBase32[hashval+1]
           {hashval,bits} = {0,0}
       end if
       ll = 3-ll   -- (1 <==> 2)
   end while
   return hash

end function

function decode_geohash(string hash) -- output is {{lat_lo,lat_hi},{long_lo,long_hi}}

   sequence r = {{-90,90},{-180,180}}  -- lat/long
   integer ll = 2                      --  " " "
   for h=1 to length(hash) do
       string b = sprintf("%05b",find(hash[h],gBase32)-1)
       for it=1 to 5 do
           r[ll][2-(b[it]='1')] = sum(r[ll])/2
           ll = 3-ll   -- (1 <==> 2)
       end for
   end for 
   return r

end function

sequence tests = {{{51.433718, -0.214126}, 2},

                 {{51.433718, -0.214126}, 9},
                 {{57.64911,  10.40744 }, 11},
                 {{57.64911,  10.40744 }, 22}}

for i=1 to length(tests) do

   {sequence location, integer precision} = tests[i]
   string geohash = encode_geohash(location, precision)
   printf(1,"geohash for %v, precision %d = %s\n",{location, precision, geohash})
   tests[i] = geohash

end for

printf(1,"\ndecode tests:\n") tests = append(tests,"ezs42") for i=1 to length(tests) do

   printf(1,"%-22s ==> %v\n",{tests[i],decode_geohash(tests[i])})

end for</lang>

Output:
geohash for {51.433718,-0.214126}, precision 2 = gc
geohash for {51.433718,-0.214126}, precision 9 = gcpue5hp4
geohash for {57.64911,10.40744}, precision 11 = u4pruydqqvj
geohash for {57.64911,10.40744}, precision 22 = u4pruydqqvj8pr9yc27rjr

decode tests:
gc                     ==> {{50.625,56.25},{-11.25,0}}
gcpue5hp4              ==> {{51.43369675,51.43373966},{-0.2141475677,-0.2141046524}}
u4pruydqqvj            ==> {{57.64910996,57.6491113},{10.40743902,10.40744036}}
u4pruydqqvj8pr9yc27rjr ==> {{57.64911,57.64911},{10.40744,10.40744}}
ezs42                  ==> {{42.58300781,42.62695312},{-5.625,-5.581054688}}

Not surprisingly, given the area it covers, "gc" is not even accurate to one significant digit, but a precision of 9 is accurate to 5 or 6 significant decimal digits, 11 to 6 or 7 digits, and 22 exceeds the natural 10 sig digs of %v. Note that 32-bit gives a last character of 'q' for the precision 22 test, for obvious reasons. The above results are from using the 64-bit interpreter.

Python

<lang python>ch32 = "0123456789bcdefghjkmnpqrstuvwxyz" bool2ch = {f"{i:05b}": ch for i, ch in enumerate(ch32)} ch2bool = {v : k for k, v in bool2ch.items()}

def bisect(val, mn, mx, bits):

   mid = (mn + mx) / 2
   if val < mid:
       bits <<= 1                        # push 0
       mx = mid                          # range lower half
   else:
       bits = bits << 1 | 1              # push 1
       mn = mid                          # range upper half
   return mn, mx, bits

def encoder(lat, lng, pre):

   latmin, latmax = -90, 90
   lngmin, lngmax = -180, 180
   bits = 0
   for i in range(pre * 5):
       if i % 2:
           # odd bit: bisect latitude
           latmin, latmax, bits = bisect(lat, latmin, latmax, bits)
       else:
           # even bit: bisect longitude
           lngmin, lngmax, bits = bisect(lng, lngmin, lngmax, bits)
   # Bits to characters
   b = f"{bits:0{pre * 5}b}"
   geo = (bool2ch[b[i*5: (i+1)*5]] for i in range(pre))
   return .join(geo)

def decoder(geo):

   minmaxes, latlong = [[-90.0, 90.0], [-180.0, 180.0]], True
   for c in geo:
       for bit in ch2bool[c]:
           minmaxes[latlong][bit != '1'] = sum(minmaxes[latlong]) / 2
           latlong = not latlong
   return minmaxes

if __name__ == '__main__':

   for (lat, lng), pre in [([51.433718, -0.214126],  2),
                           ([51.433718, -0.214126],  9),
                           ([57.64911,  10.40744] , 11),
                           ([57.64911,  10.40744] , 22)]:
       print("encoder(lat=%f, lng=%f, pre=%i) = %r"
             % (lat, lng, pre, encoder(lat, lng, pre)))</lang>
Output:
encoder(lat=51.433718, lng=-0.214126, pre=2) = 'gc'
encoder(lat=51.433718, lng=-0.214126, pre=9) = 'gcpue5hp4'
encoder(lat=57.649110, lng=10.407440, pre=11) = 'u4pruydqqvj'
encoder(lat=57.649110, lng=10.407440, pre=22) = 'u4pruydqqvj8pr9yc27rjr'

Note: The precision can be increased but would need lattitude and longitude expressed with more precision than floats, e.g. fractions or decimals for more accurate results. (but the encoder function would not need changing due to Duck Typing).

Raku

Module based

Reference: Used this for verification. <lang perl6>#20200615 Raku programming solution

use Geo::Hash;

  1. task example 1 : Ireland, most of England and Wales, small part of Scotland

say geo-encode(51.433718e0, -0.214126e0, 2);

  1. task example 2 : the umpire's chair on Center Court at Wimbledon

say geo-encode(51.433718e0, -0.214126e0, 9);

  1. Lake Raku is an artificial lake in Tallinn, Estonia
  2. https://goo.gl/maps/MEBXXhiFbN8WMo5z8

say geo-encode(59.358639e0, 24.744778e0, 4);

  1. Village Raku is a development committee in north-western Nepal
  2. https://goo.gl/maps/33s7k2h3UrHCg8Tb6

say geo-encode(29.2021188e0, 81.5324561e0, 4);</lang>

Output:
gc
gcpue5hp4
ud99
tv1y

Roll your own

Alternately, a roll-your-own version that will work with any Real coordinate, not just floating point values, and thus can return ridiculous precision. The geo-decode routine returns the range in which the actual value will be found; converted here to the mid-point with the interval size. Probably better to specify an odd precision so the error interval ends up the same for both latitude and longitude.

<lang perl6>my @Geo32 = <0 1 2 3 4 5 6 7 8 9 b c d e f g h j k m n p q r s t u v w x y z>;

sub geo-encode ( Rat(Real) $latitude, Rat(Real) $longitude, Int $precision = 9 ) {

   my @coord = $latitude, $longitude;
   my @range = [-90, 90], [-180, 180];
   my $which = 1;
   my $value = ;
   while $value.chars < $precision * 5 {
       my $mid = @range[$which].sum / 2;
       $value ~= my $upper = +(@coord[$which] > $mid);
       @range[$which][not $upper] = $mid;
       $which = not $which;
   }
   @Geo32[$value.comb(5)».parse-base(2)].join;

}

sub geo-decode ( Str $geo ) {

    my @range = [-90, 90], [-180, 180];
    my $which = 1;
    my %Geo32 = @Geo32.antipairs;
    for %Geo32{$geo.comb}».fmt('%05b').join.comb {
        @range[$which][$_] = @range[$which].sum / 2;
        $which = not $which;
    }
    @range

}

  1. TESTING

for 51.433718, -0.214126, 2, # Ireland, most of England and Wales, small part of Scotland

   51.433718,   -0.214126,  9, # the umpire's chair on Center Court at Wimbledon
   51.433718,   -0.214126, 17, # likely an individual molecule of the chair
   57.649110,   10.407440, 11, # Wikipedia test value - Råbjerg Mile in Denmark
   59.358639,   24.744778,  7, # Lake Raku in Estonia
   29.2021188, 81.5324561,  7  # Village Raku in Nepal
 -> $lat, $long, $precision {
    say "$lat, $long, $precision:\ngeo-encoded: ",
    my $enc = geo-encode $lat, $long, $precision;
    say 'geo-decoded: ', geo-decode($enc).map( {-.sum/2 ~ ' ± ' ~
         (abs(.[0]-.[1])/2).Num.fmt('%.3e')} ).join(',  ') ~ "\n";

}</lang>

51.433718, -0.214126, 2:
geo-encoded: gc
geo-decoded: 53.4375 ± 2.813e+00,  -5.625 ± 5.625e+00

51.433718, -0.214126, 9:
geo-encoded: gcpue5hp4
geo-decoded: 51.4337182 ± 2.146e-05,  -0.21412611 ± 2.146e-05

51.433718, -0.214126, 17:
geo-encoded: gcpue5hp4ebnf8unc
geo-decoded: 51.43371800000523 ± 2.046e-11,  -0.21412600000303 ± 2.046e-11

57.64911, 10.40744, 11:
geo-encoded: u4pruydqqvj
geo-decoded: 57.64911063 ± 6.706e-07,  10.407439694 ± 6.706e-07

59.358639, 24.744778, 7:
geo-encoded: ud99ejf
geo-decoded: 59.358444 ± 6.866e-04,  24.744644 ± 6.866e-04

29.2021188, 81.5324561, 7:
geo-encoded: tv1ypk4
geo-decoded: 29.202347 ± 6.866e-04,  81.532974 ± 6.866e-04

Swift

<lang Swift>let base32 = "0123456789bcdefghjkmnpqrstuvwxyz" // no "a", "i", "l", or "o"

extension String {

 subscript(i: Int) -> String {
   String(self[index(startIndex, offsetBy: i)])
 }

}

struct Coordinate {

 var latitude: Double
 var longitude: Double
 func toString() -> String {
   var latitudeHemisphere = ""
   var longitudeHemisphere = ""
   latitudeHemisphere = latitude < 0 ? " S" : " N"
   longitudeHemisphere = longitude < 0 ? " W" : " E"
   return "\(abs(latitude))\(latitudeHemisphere), \(abs(longitude))\(longitudeHemisphere)"
 }

}

func encodeGeohash (for coordinate: Coordinate, withPrecision precision: Int = 9) -> String {

 var latitudeRange = -90.0...90.0
 var longitudeRange = -180...180.0
 var hash = ""
 var hashVal = 0
 var bits = 0
 var even = true
 while (hash.count < precision) {
   let val     = even ? coordinate.longitude: coordinate.latitude
   var range   = even ? longitudeRange : latitudeRange
   let mid     = (range.lowerBound + range.upperBound) / 2
   if (val > mid) {
     hashVal = (hashVal << 1) + 1
     range = mid...range.upperBound
     if even { longitudeRange = mid...longitudeRange.upperBound }
     else    {  latitudeRange = mid...latitudeRange.upperBound }
   } else {
     hashVal = (hashVal << 1) + 0
     range   = range.lowerBound...mid
     if even { longitudeRange = longitudeRange.lowerBound...mid }
     else    {  latitudeRange =  latitudeRange.lowerBound...mid }
   }
   even = !even
   if (bits < 4) {
     bits += 1
   } else {
     bits = 0
     hash += base32[hashVal]
     hashVal = 0
   }
 }
 return hash

}

let coordinate1 = Coordinate(latitude: 51.433718, longitude: -0.214126) let coordinate2 = Coordinate(latitude: 57.649110, longitude: 10.407440)

print ("Geohash for: \(coordinate1.toString()), precision = 5 : \(encodeGeohash(for: coordinate, withPrecision: 5))") print ("Geohash for: \(coordinate1.toString()), precision = 9 : \(encodeGeohash(for: coordinate))") print ("Geohash for: \(coordinate2.toString()), precision = 11 : \(encodeGeohash(for: coordinate, withPrecision: 11))")

</lang>

Output:
Geohash for: 51.433718 N, 0.214126 W, precision = 5 : gcpue
Geohash for: 51.433718 N, 0.214126 W, precision = 9 : gcpue5hp4
Geohash for: 57.64911 N, 10.40744 E, precision = 11 : u4pruydqqvj

Wren

Translation of: Swift
Library: Wren-fmt

<lang ecmascript>import "/fmt" for Fmt

var gBase32 = "0123456789bcdefghjkmnpqrstuvwxyz"

var encodeGeohash = Fn.new { |location, prec|

   var latRange = -90..90
   var lngRange = -180..180
   var hash = ""
   var hashVal = 0
   var bits = 0
   var even = true
   while (hash.count < prec) {
       var val = even ? location[1] : location[0]
       var rng = even ? lngRange : latRange
       var mid = (rng.from + rng.to) / 2
       if (val > mid) {
           hashVal = hashVal*2 + 1
           rng = mid..rng.to
           if (even) lngRange = mid..lngRange.to else latRange = mid..latRange.to
       } else {
           hashVal = hashVal * 2
           if (even) lngRange = lngRange.from..mid else latRange = latRange.from..mid
       }
       even = !even
       if (bits < 4) {
           bits = bits + 1
       } else {
           bits = 0
           hash = hash + gBase32[hashVal]
           hashVal = 0
       }
   }
   return hash

}

var data = [

   [[51.433718, -0.214126], 2],
   [[51.433718, -0.214126], 9],
   [[57.64911,  10.40744 ], 11]

]

for (d in data) {

   var geohash = encodeGeohash.call(d[0], d[1])
   var loc = "[%(Fmt.f(9, d[0][0], 6)), %(Fmt.f(9, d[0][1], 6))]"
   System.print("geohash for %(loc), precision %(Fmt.d(-2, d[1])) = %(geohash)")

}</lang>

Output:
geohash for [51.433718, -0.214126], precision 2  = gc
geohash for [51.433718, -0.214126], precision 9  = gcpue5hp4
geohash for [57.649110, 10.407440], precision 11 = u4pruydqqvj