Base64 decode data

From Rosetta Code
Revision as of 10:50, 14 July 2019 by Thundergnat (talk | contribs) (Convert to a task.)
Task
Base64 decode data
You are encouraged to solve this task according to the task description, using any language you may know.

See Base64 encode data.

Now write a program that takes the output of the Base64 encode data task as input and regenerate the original file.

When working on the VBA implementation I found several 'solutions' on the net, including one from the software maker himself, that showed output with incorrect padding. Obviously with incorrect padding in the output you can not decode correctly to the original file again.

C++

<lang cpp>#include <algorithm>

  1. include <iostream>
  2. include <string>
  3. include <vector>

typedef unsigned char ubyte; const auto BASE64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

std::vector<ubyte> encode(const std::vector<ubyte>& source) {

   auto it = source.cbegin();
   auto end = source.cend();
   std::vector<ubyte> sink;
   while (it != end) {
       auto b1 = *it++;
       int acc;
       sink.push_back(BASE64[b1 >> 2]);            // first output (first six bits from b1)
       acc = (b1 & 0x3) << 4;                      // last two bits from b1
       if (it != end) {
           auto b2 = *it++;
           acc |= (b2 >> 4);                       // first four bits from b2
           sink.push_back(BASE64[acc]);            // second output
           acc = (b2 & 0xF) << 2;                  // last four bits from b2
           if (it != end) {
               auto b3 = *it++;
               acc |= (b3 >> 6);                   // first two bits from b3
               sink.push_back(BASE64[acc]);        // third output
               sink.push_back(BASE64[b3 & 0x3F]);  // fouth output (final six bits from b3)
           } else {
               sink.push_back(BASE64[acc]);        // third output
               sink.push_back('=');                // fourth output (1 byte padding)
           }
       } else {
           sink.push_back(BASE64[acc]);            // second output
           sink.push_back('=');                    // third output (first padding byte)
           sink.push_back('=');                    // fourth output (second padding byte)
       }
   }
   return sink;

}

int findIndex(ubyte val) {

   if ('A' <= val && val <= 'Z') {
       return val - 'A';
   }
   if ('a' <= val && val <= 'z') {
       return val - 'a' + 26;
   }
   if ('0' <= val && val <= '9') {
       return val - '0' + 52;
   }
   if ('+' == val) {
       return 62;
   }
   if ('/' == val) {
       return 63;
   }
   return -1;

}

std::vector<ubyte> decode(const std::vector<ubyte>& source) {

   if (source.size() % 4 != 0) {
       throw new std::runtime_error("Error in size to the decode method");
   }
   auto it = source.cbegin();
   auto end = source.cend();
   std::vector<ubyte> sink;
   while (it != end) {
       auto b1 = *it++;
       auto b2 = *it++;
       auto b3 = *it++; // might be first padding byte
       auto b4 = *it++; // might be first or second padding byte
       auto i1 = findIndex(b1);
       auto i2 = findIndex(b2);
       int acc;
       acc = i1 << 2;          // six bits came from the first byte
       acc |= i2 >> 4;         // two bits came from the first byte
       sink.push_back(acc);    // output the first byte
       if (b3 != '=') {
           auto i3 = findIndex(b3);
           acc = (i2 & 0xF) << 4;  // four bits came from the second byte
           acc |= i3 >> 2;         // four bits came from the second byte
           sink.push_back(acc);    // output the second byte
           if (b4 != '=') {
               auto i4 = findIndex(b4);
               acc = (i3 & 0x3) << 6;  // two bits came from the third byte
               acc |= i4;              // six bits came from the third byte
               sink.push_back(acc);    // output the third byte
           }
       }
   }
   return sink;

}

int main() {

   using namespace std;
   string data = "VG8gZXJyIGlzIGh1bWFuLCBidXQgdG8gcmVhbGx5IGZvdWwgdGhpbmdzIHVwIHlvdSBuZWVkIGEgY29tcHV0ZXIuCiAgICAtLVBhdWwgUi5FaHJsaWNo";
   vector<ubyte> datav{ begin(data), end(data) };
   cout << data << "\n\n";
   auto decoded = decode(datav);
   std::for_each(cbegin(decoded), cend(decoded), [](char c) { cout << c; });
   cout << '\n';
   return 0;

}</lang>

Output:
VG8gZXJyIGlzIGh1bWFuLCBidXQgdG8gcmVhbGx5IGZvdWwgdGhpbmdzIHVwIHlvdSBuZWVkIGEgY29tcHV0ZXIuCiAgICAtLVBhdWwgUi5FaHJsaWNo

To err is human, but to really foul things up you need a computer.
    --Paul R.Ehrlich

D

Translation of: Perl 6

<lang d>import std.base64; import std.stdio;

void main() {

   auto data = "VG8gZXJyIGlzIGh1bWFuLCBidXQgdG8gcmVhbGx5IGZvdWwgdGhpbmdzIHVwIHlvdSBuZWVkIGEgY29tcHV0ZXIuCiAgICAtLSBQYXVsIFIuIEVocmxpY2g=";
   writeln(data);
   writeln;
   auto decoded = cast(char[])Base64.decode(data);
   writeln(decoded);

}</lang>

Output:
VG8gZXJyIGlzIGh1bWFuLCBidXQgdG8gcmVhbGx5IGZvdWwgdGhpbmdzIHVwIHlvdSBuZWVkIGEgY29tcHV0ZXIuCiAgICAtLSBQYXVsIFIuIEVocmxpY2g=

To err is human, but to really foul things up you need a computer.
    -- Paul R. Ehrlich

Go

As images can no longer be uploaded to RC, I've encoded and decoded a string rather than the Rosetta Code icon. <lang go>package main

import (

   "encoding/base64"
   "fmt"

)

func main() {

   msg := "Rosetta Code Base64 decode data task"
   fmt.Println("Original :", msg)
   encoded := base64.StdEncoding.EncodeToString([]byte(msg))
   fmt.Println("\nEncoded  :", encoded)
   decoded, err := base64.StdEncoding.DecodeString(encoded)
   if err != nil {
       fmt.Println(err)
       return
   }
   fmt.Println("\nDecoded  :", string(decoded))

}</lang>

Output:
Original : Rosetta Code Base64 decode data task

Encoded  : Um9zZXR0YSBDb2RlIEJhc2U2NCBkZWNvZGUgZGF0YSB0YXNr

Decoded  : Rosetta Code Base64 decode data task

Jsish

See Base64_encode_data#Jsish for base64.jsi.

<lang javascript>/* Base64 decode, in Jsish */ var data = exec('jsish base64.jsi', {retAll:true}).data; // or use File.read('stdin'); var icon = Util.base64(data, true); File.write('rosetta-favicon.ico', icon);</lang>


Julia

Using an IOBuffer here, though not really needed to decode a string, shows how we could pipe a network stream or file though Julia's builtin Base64 decoder. <lang julia>using Base64

io = IOBuffer()

iob64_decode = Base64DecodePipe(io)

write(io, "VG8gZXJyIGlzIGh1bWFuLCBidXQgdG8gcmVhbGx5IGZvdWwgdGhpbmdzIHVwIHlvdSBuZWVkIGEgY29tcHV0ZXIuCiAgICAtLVBhdWwgUi5FaHJsaWNo")

seekstart(io)

println(String(read(iob64_decode)))

</lang>

Output:
To err is human, but to really foul things up you need a computer.
    --Paul R.Ehrlich

Kotlin

Translation of: D

<lang scala>import java.util.Base64

fun main() {

   val data =
       "VG8gZXJyIGlzIGh1bWFuLCBidXQgdG8gcmVhbGx5IGZvdWwgdGhpbmdzIHVwIHlvdSBuZWVkIGEgY29tcHV0ZXIuCiAgICAtLSBQYXVsIFIuIEVocmxpY2g="
   val decoder = Base64.getDecoder()
   val decoded = decoder.decode(data)
   val decodedStr = String(decoded, Charsets.UTF_8)
   println(decodedStr)

}</lang>

Output:
To err is human, but to really foul things up you need a computer.
    -- Paul R. Ehrlich

Perl

The MIME::Base64 module is to be preferred, but this works too. <lang perl>sub decode_base64 {

   my($d) = @_;
   $d =~ tr!A-Za-z0-9+/!!cd;
   $d =~ s/=+$//;
   $d =~ tr!A-Za-z0-9+/! -_!;
   my $r = ;
   while( $d =~ /(.{1,60})/gs ){
       my $len = chr(32 + length($1)*3/4);
       $r .= unpack("u", $len . $1 );
   }
   $r;

}

$data = <<EOD; J1R3YXMgYnJpbGxpZywgYW5kIHRoZSBzbGl0aHkgdG92ZXMKRGlkIGd5cmUgYW5kIGdpbWJsZSBp biB0aGUgd2FiZToKQWxsIG1pbXN5IHdlcmUgdGhlIGJvcm9nb3ZlcywKQW5kIHRoZSBtb21lIHJh dGhzIG91dGdyYWJlLgo= EOD

print decode_base64($data) . "\n";</lang>

Output:
'Twas brillig, and the slithy toves
Did gyre and gimble in the wabe:
All mimsy were the borogoves,
And the mome raths outgrabe.

Perl 6

Works with: Rakudo version 2018.11

<lang perl6>my $e64 = ' VG8gZXJyIGlzIGh1bWFuLCBidXQgdG8gcmVhbGx5IGZvdWwgdGhpbmdzIHVwIHlvdSBuZWVkIGEgY2 9tcHV0ZXIuCiAgICAtLSBQYXVsIFIuIEVocmxpY2g= ';

my @base64map = flat 'A' .. 'Z', 'a' .. 'z', ^10, '+', '/'; my %base64 is default(0) = @base64map.pairs.invert;

sub base64-decode-slow ($enc) {

   my $buf = Buf.new;
   for $enc.subst(/\s/, , :g).comb(4) -> $chunck {
       $buf.append: |(sprintf "%06d%06d%06d%06d", |$chunck.comb.map:
           {%base64{$_}.base(2)}).comb(8).map: {:2($_)};
   }
   $buf

}

say 'Slow:'; say base64-decode-slow($e64).decode('utf8');


  1. Of course, the above routine is slow and is only for demonstration purposes.
  2. For real code you should use a module, which is MUCH faster and heavily tested.

say "\nFast:"; use Base64::Native; say base64-decode($e64).decode('utf8');</lang>

Output:
Slow:
To err is human, but to really foul things up you need a computer.
    -- Paul R. Ehrlich

Fast:
To err is human, but to really foul things up you need a computer.
    -- Paul R. Ehrlich

Phix

<lang Phix>include builtins\base64.e string s = "Rosetta Code Base64 decode data task" string e = encode_base64(s) ?e ?decode_base64(e)</lang>

Output:
"Um9zZXR0YSBDb2RlIEJhc2U2NCBkZWNvZGUgZGF0YSB0YXNr"
"Rosetta Code Base64 decode data task"

Ruby

<lang ruby>require 'base64'

perl6_example =' VG8gZXJyIGlzIGh1bWFuLCBidXQgdG8gcmVhbGx5IGZvdWwgdGhpbmdzIHVwIHlvdSBuZWVkIGEgY2 9tcHV0ZXIuCiAgICAtLSBQYXVsIFIuIEVocmxpY2g= ' puts Base64.decode64 perl6_example</lang>

Output:
To err is human, but to really foul things up you need a computer.
    -- Paul R. Ehrlich

Seed7

The Seed7 library encoding.s7i defines the functions toBase64 and fromBase64.

<lang seed7>$ include "seed7_05.s7i";

 include "gethttp.s7i";
 include "encoding.s7i";

const proc: main is func

 local
   var string: original is "";
   var string: encoded is "";
 begin
   original := getHttp("rosettacode.org/favicon.ico");
   encoded := toBase64(original);
   writeln("Is the Rosetta Code icon the same (byte for byte) encoded then decoded: " <&
           fromBase64(encoded) = original);
 end func;</lang>
Output:
Is the Rosetta Code icon the same (byte for byte) encoded then decoded: TRUE

Sidef

<lang ruby>var data = <<'EOT' VG8gZXJyIGlzIGh1bWFuLCBidXQgdG8gcmVhbGx5IGZvdWwgdGhpbmdzIHVwIHlvdSBuZWVkIGEgY2 9tcHV0ZXIuCiAgICAtLSBQYXVsIFIuIEVocmxpY2g= EOT

say data.decode_base64</lang>

Output:
To err is human, but to really foul things up you need a computer.
    -- Paul R. Ehrlich

zkl

Using shared libraries for cURL and message hashing: <lang zkl>var [const] MsgHash=Import("zklMsgHash"), Curl=Import("zklCurl");

icon:=Curl().get("http://rosettacode.org/favicon.ico"); //-->(Data(4,331),693,0) icon=icon[0][icon[1],*]; // remove header iconEncoded:=MsgHash.base64encode(icon); iconDecoded:=MsgHash.base64decode(iconEncoded); File("rosettaCodeIcon.ico","wb").write(iconDecoded); # eyeball checking says good println("Is the Rosetta Code icon the same (byte for byte) encoded then decoded: ",

  icon==iconDecoded);</lang>
Output:
Is the Rosetta Code icon the same (byte for byte) encoded then decoded: True
Text based test:

<lang zkl>msg,b64 := "Rosetta Code Base64 decode data task", MsgHash.base64encode(msg); println("Original: %s\nEncoded: %s\nBytes: %s\nDecoded: %s"

  .fmt(msg, b64.text, b64.bytes().apply("toString",16).concat(","),
       MsgHash.base64decode(b64).text));</lang>
Original: Rosetta Code Base64 decode data task
Encoded:  Um9zZXR0YSBDb2RlIEJhc2U2NCBkZWNvZGUgZGF0YSB0YXNr

Bytes:    55,6d,39,7a,5a,58,52,30,59,53,42,44,62,32,52,6c,49,45,4a,68,63,32,55,32,4e,43,42,6b,5a,57,4e,76,5a,47,55,67,5a,47,46,30,59,53,42,30,59,58,4e,72,a
Decoded:  Rosetta Code Base64 decode data task