SHA-256 Merkle tree

From Rosetta Code
Revision as of 20:33, 12 December 2021 by rosettacode>VincentArm (add task to arm assembly raspberry pi)
Task
SHA-256 Merkle tree
You are encouraged to solve this task according to the task description, using any language you may know.

As described in its documentation, Amazon S3 Glacier requires that all uploaded files come with a checksum computed as a Merkle Tree using SHA-256.

Specifically, the SHA-256 hash is computed for each 1MiB block of the file. And then, starting from the beginning of the file, the raw hashes of consecutive blocks are paired up and concatenated together, and a new hash is computed from each concatenation. Then these are paired up and concatenated and hashed, and the process continues until there is only one hash left, which is the final checksum. The hexadecimal representation of this checksum is the value that must be included with the AWS API call to upload the object (or complete a multipart upload).

Implement this algorithm in your language; you can use the code from the SHA-256 task for the actual hash computations. For better manageability and portability, build the tree using a smaller block size of only 1024 bytes, and demonstrate it on the RosettaCode title image with that block size. The final result should be the hexadecimal digest value a4f902cf9d51fe51eda156a6792e1445dff65edf3a217a1f3334cc9cf1495c2c.

ARM Assembly

Works with: as version Raspberry Pi

<lang ARM Assembly> /* ARM assembly Raspberry PI */ /* program merkleRoot.s */

/* REMARK 1 : this program use routines in a include file

  see task Include a file language arm assembly 
  for the routine affichageMess conversion10 
  see at end of this program the instruction include */

/* for constantes see task include a file in arm assembly */ /************************************/ /* Constantes */ /************************************/ .include "../constantes.inc" .equ READ, 3 .equ OPEN, 5

.equ O_RDWR, 0x0002 @ open for reading and writing

.equ BUFFERSIZE, 65535 @ file buffer .equ LGBLOCK, 1024 @ block length

.equ LGHASH, 32 @ hash length .equ NBELEMENTS, 40 @ array size .equ LGZONETRAV, 2048 @ process array size


/*******************************************/ /* Structures */ /********************************************/ /* structure variables hash compute */

   .struct  0

var_a: @ a

   .struct  var_a + 4

var_b: @ b

   .struct  var_b + 4

var_c: @ c

   .struct  var_c + 4

var_d: @ d

   .struct  var_d + 4

var_e: @ e

   .struct  var_e + 4

var_f: @ f

   .struct  var_f + 4

var_g: @ g

   .struct  var_g + 4

var_h: @ h

   .struct  var_h + 4

/* structure linkedlist*/

   .struct  0

@llist_next: @ next element @ .struct llist_next + 4 @llist_value: @ element value @ .struct llist_value + 4 @llist_fin: /***********************************/ /* Initialized data */ /***********************************/ .data szFileName: .asciz "title.png" szCarriageReturn: .asciz "\n" szMessErreur: .asciz "Error detected.\n" szMessNbHash: .asciz "Start hash number : @ \n" .align 4 /* array constantes Hi */ tbConstHi: .int 0x6A09E667 @ H0

                    .int 0xBB67AE85       @ H1
                    .int 0x3C6EF372       @ H2
                    .int 0xA54FF53A       @ H3
                    .int 0x510E527F       @ H4
                    .int 0x9B05688C       @ H5
                    .int 0x1F83D9AB       @ H6
                    .int 0x5BE0CD19       @ H7

/* array 64 constantes Kt */ tbConstKt:

 .int 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
 .int 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
 .int 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
 .int 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
 .int 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
 .int 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
 .int 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
 .int 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2

/***********************************/ /* UnInitialized data */ /***********************************/ .bss sBuffer: .skip BUFFERSIZE @ file buffer .align 4 iNbBlocs: .skip 4 iNbHash: .skip 4 sZoneConv: .skip 24 sZoneTrav: .skip LGZONETRAV HashResult: .skip LGHASH + 4 HashProcess: .skip LGHASH * 2 .align 8 tbListHash: .skip LGHASH * NBELEMENTS tbH: .skip 4 * 8 @ 8 variables H tbabcdefgh: .skip 4 * 8 tbW: .skip 4 * 64 @ 64 words W

/***********************************/ /* code section */ /***********************************/ .text .global main main:

   ldr r0,iAdrszFileName               @ File name
   mov r1,#O_RDWR                      @  flags
   mov r2,#0                           @ mode
   mov r7,#OPEN                        @ open file
   svc #0 
   cmp r0,#0                           @ error ?
   ble error
   mov r12,r0                          @ save fd
   ldr r1,iAdrsBuffer
   mov r2,#BUFFERSIZE
   mov r7,#READ                        @ call system read file
   svc 0 
   cmp r0,#0                           @ error read ?
   ble error
   mov r7,r0                           @ number of read characters
   ldr r6,iAdrsBuffer
   mov r5,#0                           @ counter characters block

1:

   add r0,r6,r5                        @ start address of each block
   mov r1,#LGBLOCK
   bl computeSHA256
   bl storeHash                        @ store hash in start array
   cmp r0,#-1
   beq error
   add r5,#LGBLOCK                     @ new buffer offset
   sub r0,r7,r5                        @ 
   cmp r0,#LGBLOCK                     @ last block
   bge 1b                              @ and loop 
   sub r1,r7,r5                        @ length last block
   add r0,r6,r5                        @ address last block
   bl computeSHA256
   bl storeHash
   cmp r0,#-1
   beq error
   ldr r0,iAdriNbHash
   ldr r0,[r0]
   ldr r1,iAdrsZoneConv
   bl conversion10
   ldr r0,iAdrszMessNbHash
   ldr r1,iAdrsZoneConv
   bl strInsertAtCharInc
   bl affichageMess
   ldr r0,iAdrtbListHash
   ldr r1,iAdrHashResult
   
   bl calculerMerkleRoot
   ldr r0,iAdrHashResult
   bl displaySHA256                   @ display résult
   

end:

   mov r0,r12                          @ FD
   mov r7, #CLOSE                      @ call system close file
   svc #0 
   cmp r0,#0
   blt error
   mov r0,#0                           @ return code
   b 100f

error:

   ldr r1,iAdrszMessErreur             @ error message
   bl   displayError
   mov r0,#1                           @ return error code

100: @ standard end of the program

   mov r7, #EXIT                       @ request to exit program
   svc 0                               @ perform system call

iAdrsBuffer: .int sBuffer iAdrszFileName: .int szFileName iAdrszMessErreur: .int szMessErreur iAdrszCarriageReturn: .int szCarriageReturn iAdrHashResult: .int HashResult iAdrszMessNbHash: .int szMessNbHash /******************************************************************/ /* store hash in start array */ /******************************************************************/ /* r0 hash address */ storeHash:

   push {r1-r5,lr} 
   ldr r2,iAdriNbHash           @ number element counter
   ldr r3,[r2]
   ldr r1,iAdrtbListHash        @ address array hash
   mov r4,#LGHASH               @ hash length 
   mla r5,r4,r3,r1              @ compute store address
   mov r1,#0                    @ counter 

1:

   ldr r4,[r0,r1]               @ load four bytes
   str r4,[r5,r1]               @ store four bytes
   add r1,#4
   cmp r1,#LGHASH               @ 32 bytes ?
   blt 1b                       @ no -> loop
   add r3,#1
   cmp r3,#NBELEMENTS
   movge r0,#-1                 @ error ?
   bge 100f
   str r3,[r2]                  @ store new counter hash
   

100:

   pop {r1-r5,lr}               @ restaur registers
   bx lr 

iAdriNbHash: .int iNbHash iAdrtbListHash: .int tbListHash /******************************************************************/ /* compute hash root Merkle */ /******************************************************************/ @ r0 start array hash address @ r1 result array address (32 bytes) calculerMerkleRoot:

   push {r1-r12,lr}         @ save  registers 
   mov r10,r1
   mov r12,sp               @ save stack adresse
   ldr r3,iAdriNbHash
   ldr r3,[r3]
   cmp r3,#0                @ 0 hash ?
   moveq r0,#-1             @ error
   beq 100f
   mov r4,#LGHASH * 2
   mul r1,r3,r4             @ compute hash size * blocks number * 2  
   sub sp,sp,r1             @ reserve array
   mov fp,sp                @ address previousTreeLayer
   lsr r1,#1                @ reserve size / 2
   add r7,fp,r1             @ address TreeLayer
   mov r2,#0                @ counter
   mov r4,#LGHASH

1:

   mul r1,r2,r4
   add r6,r0,r1
   add r8,fp,r1
   mov r5,#0

2: @ loop copying 32 octets hash

   ldr r9,[r6,r5]
   str r9,[r8,r5]
   add r5,r5,#4             @ count
   cmp r5,#LGHASH
   blt 2b
   add r2,#1                @ next hash block
   cmp r2,r3                @ maxi ?
   blt 1b 
   mov r0,fp
   mov r2,#0               @ indice TreeLayer
   mov r5,#0               @ indice layer

3: @ loop

   cmp r3,#1               @ one hash ?
   beq 12f                 @ yes -> end
   sub r3,#1
   mov r4,#LGHASH
   mla r8,r2,r4,fp         @ address hash 1
   mov r9,r7               @ raz TreeLayer

4:

   cmp r2,r3
   bgt 11f                 @ end loop ?
   blt 5f
   mov r0,r8               @ last odd hash
   add r2,#1               @ no concatenation 
   b 9f

5: @ other hashes

   add r6,r8,#LGHASH       @ address hash  N + 1

6:

   add r2,#1
   ldr r1,iAdrHashProcess  @ address array hash concatenation
   mov r0,#0

7: @ loop copy element N

   ldr r4,[r8,r0]
   rev r4,r4               @ inversion byte in word
   str r4,[r1,r0]
   add r0,r0,#4
   cmp r0,#LGHASH
   blt 7b
   add r1,r1,#LGHASH
   mov r0,#0

8: @ loop copy element N + 1

   ldr r4,[r6,r0]
   rev r4,r4               @ inversion byte in word
   str r4,[r1,r0]
   add r0,r0,#4
   cmp r0,#LGHASH
   blt 8b
   
   ldr r0,iAdrHashProcess
   mov r1,#LGHASH * 2
   bl computeSHA256        @ calcul du hash complet

9:

   mov r1,#0

10: @ loop copy hash in treelayer

   ldr r4,[r0,r1]
   str r4,[r9,r1]
   add r1,r1,#4
   cmp r1,#LGHASH
   blt 10b
   mov r1,r9
   add r2,r2,#1           @ incremente counter previousTreeLayer
   add r5,r5,#1           @ incremente counter treeLayer
   add r9,#LGHASH         @ next address treeLayer
   add r8,r6,#LGHASH      @ maj element N avec N + 2 
   b 4b
   

11:

   mov r0,fp
   mov fp,r7             @ treelayer in previous 
   mov r7,r0
   mov r3,r5             @ counter previous = counter treelayer
   mov r5,#0             @ raz treelayer
   mov r2,#0             @ raz previousTreeLayer
   b 3b                  @ and loop 
   

12: @ end process

   mov r1,fp
   mov r2,#0

13: @ loop copy result

   ldr r3,[r1,r2]
   str r3,[r10,r2]
   add r2,r2,#4
   cmp r2,#LGHASH
   blt 13b
   mov r0,r10            @ return address result 
   b 100f

99: @ error

  mov r0,#-1

100:

   mov sp,r12            @ restaur stack
   pop {r1-r12,lr}       @ restaur registers
   bx lr                 @ return

iAdrHashProcess: .int HashProcess /******************************************************************/ /* compute SHA1 */ /******************************************************************/ /* r0 contains the address of the message */ /* r1 contains string length */ computeSHA256:

   push {r1-r12,lr}         @ save  registres
   mov r5,r1                @ counter length 
   ldr r1,iAdrtbH
   mov r3,#0
   mov r4,#0

razTB:

   str r4,[r1,r3]
   add r3,#4
   cmp r3,#8 + 8 + 64
   blt razTB
   ldr r1,iAdrsZoneTrav
   mov r3,#0
   mov r4,#0

razZone:

   str r4,[r1,r3]
   add r3,#4
   cmp r3,#LGZONETRAV
   blt razZone
   @vidregtit apresraz
   mov r2,#0

debCopy: @ copy string in work area

   ldrb r3,[r0,r2]
   strb r3,[r1,r2]
   cmp r2,r5
   addne r2,r2,#1
   bne debCopy
   @vidregtit aprescopie
   lsl r6,r2,#3             @ initial message length in bits 
   mov r3,#0b10000000       @ add bit 1 at end of string
   strb r3,[r1,r2]
   add r2,r2,#1             @ length in bytes
   lsl r4,r2,#3             @ length in bits
   mov r3,#0

addZeroes:

   lsr r5,r2,#6
   lsl r5,r5,#6
   sub r5,r2,r5
   cmp r5,#56
   beq storeLength          @ yes -> end add
   strb r3,[r1,r2]          @ add zero at message end
   add r2,#1                @ increment lenght bytes 
   add r4,#8                @ increment length in bits
   b addZeroes

storeLength:

   add r2,#4                @ add four bytes
   rev r6,r6                @ inversion bits initials message length
   str r6,[r1,r2]           @ and store at end
   ldr r7,iAdrtbConstHi     @ constantes H address
   ldr r4,iAdrtbH           @ start area H
   mov r5,#0

loopConst: @ init array H with start constantes

   ldr r6,[r7,r5,lsl #2]    @ load constante
   str r6,[r4,r5,lsl #2]    @ and store
   add r5,r5,#1
   cmp r5,#8
   blt loopConst
                            @ split into block of 64 bytes
   add r2,#4                @  TODO : à revoir
   lsr r4,r2,#6             @ blocks number
   ldr r0,iAdriNbBlocs
   str r4,[r0]              @ save block maxi
   mov r7,#0                @ n° de block et r1 contient l adresse zone de travail

loopBlock: @ begin loop of each block of 64 bytes

   mov r0,r7
   bl inversion             @ inversion each word because little indian
   ldr r3,iAdrtbW           @ working area W address
   mov r6,#0                @ indice t
                            /* r2  address begin each block */
   ldr r1,iAdrsZoneTrav
   add r2,r1,r7,lsl #6      @  compute block begin  indice * 4 * 16
   @vidregtit avantloop
   @mov r0,r2
   @vidmemtit  verifBloc r0 10

loopPrep: @ loop for expand 80 words

   cmp r6,#15               @ 
   bgt expand1
   ldr r0,[r2,r6,lsl #2]    @ load byte message
   str r0,[r3,r6,lsl #2]    @ store in first 16 block 
   b expandEnd

expand1:

   sub r8,r6,#2
   ldr r9,[r3,r8,lsl #2]
   ror r10,r9,#17           @ fonction e1 (256)
   ror r11,r9,#19
   eor r10,r10,r11
   lsr r11,r9,#10
   eor r10,r10,r11
   sub r8,r6,#7
   ldr r9,[r3,r8,lsl #2]
   add r9,r9,r10            @ + w - 7
   sub r8,r6,#15
   ldr r10,[r3,r8,lsl #2]
   ror r11,r10,#7          @ fonction e0 (256)
   ror r12,r10,#18
   eor r11,r12
   lsr r12,r10,#3
   eor r10,r11,r12
   add r9,r9,r10
   sub r8,r6,#16
   ldr r11,[r3,r8,lsl #2]
   add r9,r9,r11
   str r9,[r3,r6,lsl #2] 

expandEnd:

   add r6,r6,#1
   cmp r6,#64                 @ 64 words ?
   blt loopPrep               @ and loop


   /* COMPUTING THE MESSAGE DIGEST */
   /* r1  area H constantes address */
   /* r3  working area W address  */
   /* r5  address constantes K   */
   /* r6  counter t */
   /* r7  block counter */
   /* r8  addresse variables a b c d e f g h  */
   @ldr r0,iAdrtbW
   @vidmemtit  verifW80 r0 20
                              @ init variable a b c d e f g h
   ldr r0,iAdrtbH
   ldr r8,iAdrtbabcdefgh
   mov r1,#0

loopInita:

   ldr r9,[r0,r1,lsl #2]
   str r9,[r8,r1,lsl #2]
   add r1,r1,#1
   cmp r1,#8
   blt loopInita


   ldr r1,iAdrtbConstHi
   ldr r5,iAdrtbConstKt
   mov r6,#0

loop64T: @ begin loop 64 t

   ldr r9,[r8,#var_h]
   ldr r10,[r8,#var_e]      @ calcul T1
   ror r11,r10,#6           @ fonction sigma 1
   ror r12,r10,#11
   eor r11,r12
   ror r12,r10,#25
   eor r11,r12
   add r9,r9,r11             @ h + sigma1 (e)
   ldr r0,[r8,#var_f]        @  fonction ch  x and y xor (non x and z)
   ldr r4,[r8,#var_g]
   and r11,r10,r0
   mvn r12,r10
   and r12,r12,r4
   eor r11,r12
   add r9,r9,r11             @ h + sigma1 (e) + ch (e,f,g)
   ldr r0,[r5,r6,lsl #2]     @ load constantes k0
   add r9,r9,r0
   ldr r0,[r3,r6,lsl #2]     @ Wt
   add r9,r9,r0
                             @ calcul T2
   ldr r10,[r8,#var_a]       @ fonction sigma 0
   ror r11,r10,#2
   ror r12,r10,#13
   eor r11,r11,r12
   ror r12,r10,#22
   eor r11,r11,r12
   ldr r2,[r8,#var_b]
   ldr r4,[r8,#var_c]
                             @ fonction maj x and y xor x and z xor y and z
   and r12,r10,r2
   and r0,r10,r4
   eor r12,r12,r0
   and r0,r2,r4
   eor r12,r12,r0            @
   add r12,r12,r11           @ T2
                             @ compute variables
   ldr r4,[r8,#var_g]
   str r4,[r8,#var_h]
   ldr r4,[r8,#var_f]
   str r4,[r8,#var_g]
   ldr r4,[r8,#var_e]
   str r4,[r8,#var_f]
   ldr r4,[r8,#var_d]
   add r4,r4,r9              @ add T1
   str r4,[r8,#var_e]
   ldr r4,[r8,#var_c]
   str r4,[r8,#var_d]
   ldr r4,[r8,#var_b]
   str r4,[r8,#var_c]
   ldr r4,[r8,#var_a]
   str r4,[r8,#var_b]
   add r4,r9,r12             @ add T1 T2
   str r4,[r8,#var_a]
   mov r0,r8
   add r6,r6,#1              @ increment t
   cmp r6,#64
   blt loop64T
                             @ End block
   ldr r0,iAdrtbH            @ start area H
   mov r10,#0

loopStoreH:

   ldr r9,[r8,r10,lsl #2]
   ldr r3,[r0,r10,lsl #2]
   add r3,r9
   str r3,[r0,r10,lsl #2]    @ store variables in H0
   add r10,r10,#1
   cmp r10,#8
   blt loopStoreH
                             @ other bloc
   add r7,#1                 @ increment block
   ldr r0,iAdriNbBlocs
   ldr r4,[r0]               @ restaur maxi block
   cmp r7,r4                 @ maxi ?
   blt loopBlock             @  loop other block
   ldr r0,iAdrtbH            @ return result area H

100:

   pop {r1-r12,lr}           @ restaur registers
   bx lr                     @ return  

iAdrtbConstHi: .int tbConstHi iAdrtbConstKt: .int tbConstKt iAdrtbH: .int tbH iAdrtbW: .int tbW iAdrtbabcdefgh: .int tbabcdefgh iAdriNbBlocs: .int iNbBlocs iAdrsZoneTrav: .int sZoneTrav /******************************************************************/ /* inversion des mots de 32 bits d un bloc */ /******************************************************************/ /* r0 contains N° block */ inversion:

   push {r1-r3,lr}            @ save registers 
   ldr r1,iAdrsZoneTrav
   add r1,r0,lsl #6           @ debut du bloc
   mov r2,#0

1: @ start loop

   ldr r3,[r1,r2,lsl #2]
   rev r3,r3
   str r3,[r1,r2,lsl #2]
   add r2,r2,#1
   cmp r2,#16
   blt 1b

100:

   pop {r1-r3,lr}             @ restaur registres 
   bx lr                      @return

/******************************************************************/ /* display hash SHA256 */ /******************************************************************/ /* r0 contains the address of hash */ displaySHA256:

   push {r1-r3,lr}                @ save  registres
   mov r3,r0
   mov r2,#0

1:

   ldr r0,[r3,r2,lsl #2]          @ load 4 bytes
   @rev r0,r0                      @ reverse bytes
   ldr r1,iAdrsZoneConv
   bl conversion16                @ conversion hexa
   mov r4,#0
   strb r4,[r1,r0]                @ 0 final
   ldr r0,iAdrsZoneConv
   bl affichageMess
   add r2,r2,#1
   cmp r2,#LGHASH / 4
   blt 1b                         @ and loop
   ldr r0,iAdrszCarriageReturn
   bl affichageMess               @ display message

100:

   pop {r1-r3,lr}                 @ restaur registers
   bx lr                          @ return  

iAdrsZoneConv: .int sZoneConv

/***************************************************/ /* ROUTINES INCLUDE */ /***************************************************/ .include "../affichage.inc" </lang>

Start hash number : 22
A4F902CF9D51FE51EDA156A6792E1445DFF65EDF3A217A1F3334CC9CF1495C2C

C

Library: GLib

<lang c>#include <glib.h>

  1. include <stdlib.h>
  2. include <stdio.h>
  3. include <string.h>

guchar* sha256_merkle_tree(FILE* in, size_t block_size) {

   gchar* buffer = g_malloc(block_size);
   GPtrArray* hashes = g_ptr_array_new_with_free_func(g_free);
   gssize digest_length = g_checksum_type_get_length(G_CHECKSUM_SHA256);
   GChecksum* checksum = g_checksum_new(G_CHECKSUM_SHA256);
   size_t bytes;
   while ((bytes = fread(buffer, 1, block_size, in)) > 0) {
       g_checksum_reset(checksum);
       g_checksum_update(checksum, (guchar*)buffer, bytes);
       gsize len = digest_length;
       guchar* digest = g_malloc(len);
       g_checksum_get_digest(checksum, digest, &len);
       g_ptr_array_add(hashes, digest);
   }
   g_free(buffer);
   guint hashes_length = hashes->len;
   if (hashes_length == 0) {
       g_ptr_array_free(hashes, TRUE);
       g_checksum_free(checksum);
       return NULL;
   }
   while (hashes_length > 1) {
       guint j = 0;
       for (guint i = 0; i < hashes_length; i += 2, ++j) {
           guchar* digest1 = g_ptr_array_index(hashes, i);
           guchar* digest_out = g_ptr_array_index(hashes, j);
           if (i + 1 < hashes_length) {
               guchar* digest2 = g_ptr_array_index(hashes, i + 1);
               g_checksum_reset(checksum);
               g_checksum_update(checksum, digest1, digest_length);
               g_checksum_update(checksum, digest2, digest_length);
               gsize len = digest_length;
               g_checksum_get_digest(checksum, digest_out, &len);
           } else {
               memcpy(digest_out, digest1, digest_length);
           }
       }
       hashes_length = j;
   }
   guchar* result = g_ptr_array_steal_index(hashes, 0);
   g_ptr_array_free(hashes, TRUE);
   g_checksum_free(checksum);
   return result;

}

int main(int argc, char** argv) {

   if (argc != 2) {
       fprintf(stderr, "usage: %s filename\n", argv[0]);
       return EXIT_FAILURE;
   }
   FILE* in = fopen(argv[1], "rb");
   if (in) {
       guchar* digest = sha256_merkle_tree(in, 1024);
       fclose(in);
       if (digest) {
           gssize length = g_checksum_type_get_length(G_CHECKSUM_SHA256);
           for (gssize i = 0; i < length; ++i)
               printf("%02x", digest[i]);
           printf("\n");
           g_free(digest);
       }
   } else {
       perror(argv[1]);
       return EXIT_FAILURE;
   }
   return EXIT_SUCCESS;

}</lang>

Output:
a4f902cf9d51fe51eda156a6792e1445dff65edf3a217a1f3334cc9cf1495c2c

C++

Library: OpenSSL

<lang cpp>#include <cstdlib>

  1. include <fstream>
  2. include <iomanip>
  3. include <iostream>
  4. include <sstream>
  5. include <vector>
  6. include <openssl/sha.h>

class sha256_exception : public std::exception { public:

   const char* what() const noexcept override {
       return "SHA-256 error";
   }

};

class sha256 { public:

   sha256() { reset(); }
   sha256(const sha256&) = delete;
   sha256& operator=(const sha256&) = delete;
   void reset() {
       if (SHA256_Init(&context_) == 0)
           throw sha256_exception();
   }
   void update(const void* data, size_t length) {
       if (SHA256_Update(&context_, data, length) == 0)
           throw sha256_exception();
   }
   std::vector<unsigned char> digest() {
       std::vector<unsigned char> digest(SHA256_DIGEST_LENGTH);
       if (SHA256_Final(digest.data(), &context_) == 0)
           throw sha256_exception();
       return digest;
   }

private:

   SHA256_CTX context_;

};

std::string digest_to_string(const std::vector<unsigned char>& digest) {

   std::ostringstream out;
   out << std::hex << std::setfill('0');
   for (size_t i = 0; i < digest.size(); ++i)
       out << std::setw(2) << static_cast<int>(digest[i]);
   return out.str();

}

std::vector<unsigned char> sha256_merkle_tree(std::istream& in, size_t block_size) {

   std::vector<std::vector<unsigned char>> hashes;
   std::vector<char> buffer(block_size);
   sha256 md;
   while (in) {
       in.read(buffer.data(), block_size);
       size_t bytes = in.gcount();
       if (bytes == 0)
           break;
       md.reset();
       md.update(buffer.data(), bytes);
       hashes.push_back(md.digest());
   }
   if (hashes.empty())
       return {};
   size_t length = hashes.size();
   while (length > 1) {
       size_t j = 0;
       for (size_t i = 0; i < length; i += 2, ++j) {
           auto& digest1 = hashes[i];
           auto& digest_out = hashes[j];
           if (i + 1 < length) {
               auto& digest2 = hashes[i + 1];
               md.reset();
               md.update(digest1.data(), digest1.size());
               md.update(digest2.data(), digest2.size());
               digest_out = md.digest();
           } else {
               digest_out = digest1;
           }
       }
       length = j;
   }
   return hashes[0];

}

int main(int argc, char** argv) {

   if (argc != 2) {
       std::cerr << "usage: " << argv[0] << " filename\n";
       return EXIT_FAILURE;
   }
   std::ifstream in(argv[1], std::ios::binary);
   if (!in) {
       std::cerr << "Cannot open file " << argv[1] << ".\n";
       return EXIT_FAILURE;
   }
   try {
       std::cout << digest_to_string(sha256_merkle_tree(in, 1024)) << '\n';
   } catch (const std::exception& ex) {
       std::cerr << ex.what() << "\n";
       return EXIT_FAILURE;
   }
   return EXIT_SUCCESS;

}</lang>

Output:
a4f902cf9d51fe51eda156a6792e1445dff65edf3a217a1f3334cc9cf1495c2c

Delphi

Library: DCPsha256
Translation of: Go

<lang Delphi> program SHA256_Merkle_tree;

{$APPTYPE CONSOLE}

uses

 System.SysUtils,
 System.Classes,
 DCPsha256;

function SHA256(const Input: TArray<Byte>; Len: Integer = -1): TArray<Byte>; var

 Hasher: TDCP_sha256;
 l: Integer;

begin

 if Len < 0 then
   l := length(Input)
 else
   l := Len;
 Hasher := TDCP_sha256.Create(nil);
 try
   Hasher.Init;
   Hasher.Update(Input[0], l);
   SetLength(Result, Hasher.HashSize div 8);
   Hasher.final(Result[0]);
 finally
   Hasher.Free;
 end;

end;

function Merkle_tree(FileName: TFileName): string; const

 blockSize = 1024;

var

 f: TMemoryStream;
 hashes: TArray<TArray<byte>>;
 bytesRead: Cardinal;
 buffer: TArray<byte>;
 i, index: Integer;

begin

 Result := ;
 if not FileExists(FileName) then
   exit;
 SetLength(buffer, blockSize);
 FillChar(buffer[0], blockSize, 0);
 f := TMemoryStream.Create;
 f.LoadFromFile(FileName);
 index := 0;
 repeat
   bytesRead := f.Read(buffer, blockSize);
   if 0 = bytesRead then
     Break;
   Insert(SHA256(buffer, bytesRead), hashes, index);
   inc(index);
 until false;
 f.Free;
 SetLength(buffer, 64);
 while Length(hashes) > 1 do
 begin
   var hashes2: TArray<TArray<byte>>;
   index := 0;
   i := 0;
   while i < length(hashes) do
   begin
     if i < length(hashes) - 1 then
     begin
       buffer := copy(hashes[i], 0, length(hashes[i]));
       buffer := concat(buffer, copy(hashes[i + 1], 0, length(hashes[i])));
       Insert(SHA256(buffer), hashes2, index);
       inc(index);
     end
     else
     begin
       Insert(hashes[i], hashes2, index);
       inc(index);
     end;
     inc(i, 2);
   end;
   hashes := hashes2;
 end;
 Result := ;
 for var b in hashes[0] do
 begin
   Result := Result + b.ToHexString(2);
 end;

end;

begin

 writeln(Merkle_tree('title.png'));
 readln;

end.</lang>

Output:
A4F902CF9D51FE51EDA156A6792E1445DFF65EDF3A217A1F3334CC9CF1495C2C

Factor

Works with: Factor version 0.99 2020-08-14

<lang factor>USING: checksums checksums.sha fry grouping io io.encodings.binary io.files kernel make math math.parser namespaces sequences ;

each-block ( ... size quot: ( ... block -- ... ) -- ... )
   input-stream get spin (each-stream-block) ; inline
>sha-256 ( seq -- newseq ) sha-256 checksum-bytes ;
(hash-read) ( path encoding chunk-size -- )
   '[ _ [ >sha-256 , ] each-block ] with-file-reader ;

! Read a file in chunks as a sequence of sha-256 hashes, so as ! not to store a potentially large file in memory all at once.

hash-read ( path chunk-size -- seq )
   binary swap [ (hash-read) ] { } make ;
hash-combine ( seq -- newseq )
   2 <groups>
   [ dup length 1 > [ concat >sha-256 ] [ first ] if ] map ;
merkle-hash ( path chunk-size -- str )
   hash-read [ dup length 1 = ] [ hash-combine ] until first
   bytes>hex-string ;

"title.png" 1024 merkle-hash print</lang>

Output:
a4f902cf9d51fe51eda156a6792e1445dff65edf3a217a1f3334cc9cf1495c2c

Go

<lang go>package main

import (

   "crypto/sha256"
   "fmt"
   "io"
   "log"
   "os"

)

func main() {

   const blockSize = 1024
   f, err := os.Open("title.png")
   if err != nil {
       log.Fatal(err)
   }
   defer f.Close()
   var hashes [][]byte
   buffer := make([]byte, blockSize)
   h := sha256.New()
   for {
       bytesRead, err := f.Read(buffer)
       if err != nil {
           if err != io.EOF {
               log.Fatal(err)
           }
           break
       }
       h.Reset()
       h.Write(buffer[:bytesRead])
       hashes = append(hashes, h.Sum(nil))
   }
   buffer = make([]byte, 64)
   for len(hashes) > 1 {
       var hashes2 [][]byte
       for i := 0; i < len(hashes); i += 2 {
           if i < len(hashes)-1 {                
               copy(buffer, hashes[i])
               copy(buffer[32:], hashes[i+1])
               h.Reset()
               h.Write(buffer)
               hashes2 = append(hashes2, h.Sum(nil))
           } else {
               hashes2 = append(hashes2, hashes[i])
           }
       }
       hashes = hashes2
   }
   fmt.Printf("%x", hashes[0])
   fmt.Println()

}</lang>

Output:
a4f902cf9d51fe51eda156a6792e1445dff65edf3a217a1f3334cc9cf1495c2c

Java

<lang java>import java.io.*; import java.security.*; import java.util.*;

public class SHA256MerkleTree {

   public static void main(String[] args) {
       if (args.length != 1) {
           System.err.println("missing file argument");
           System.exit(1);
       }
       try (InputStream in = new BufferedInputStream(new FileInputStream(args[0]))) {
           byte[] digest = sha256MerkleTree(in, 1024);
           if (digest != null)
               System.out.println(digestToString(digest));
       } catch (Exception e) {
           e.printStackTrace();
       }
   }
   private static String digestToString(byte[] digest) {
       StringBuilder result = new StringBuilder();
       for (int i = 0; i < digest.length; ++i)
           result.append(String.format("%02x", digest[i]));
       return result.toString();
   }
   private static byte[] sha256MerkleTree(InputStream in, int blockSize) throws Exception {
       byte[] buffer = new byte[blockSize];
       int bytes;
       MessageDigest md = MessageDigest.getInstance("SHA-256");
       List<byte[]> digests = new ArrayList<>();
       while ((bytes = in.read(buffer)) > 0) {
           md.reset();
           md.update(buffer, 0, bytes);
           digests.add(md.digest());
       }
       int length = digests.size();
       if (length == 0)
           return null;
       while (length > 1) {
           int j = 0;
           for (int i = 0; i < length; i += 2, ++j) {
               byte[] digest1 = digests.get(i);
               if (i + 1 < length) {
                   byte[] digest2 = digests.get(i + 1);
                   md.reset();
                   md.update(digest1);
                   md.update(digest2);
                   digests.set(j, md.digest());
               } else {
                   digests.set(j, digest1);
               }
           }
           length = j;
       }
       return digests.get(0);
   }

}</lang>

Output:
a4f902cf9d51fe51eda156a6792e1445dff65edf3a217a1f3334cc9cf1495c2c

Julia

<lang julia>using SHA

function merkletree(filename="title.png", blocksize=1024)

   bytes = codeunits(read(filename, String))
   len = length(bytes)
   hsh = [sha256(view(bytes. i:min(i+blocksize-1, len)])) for i in 1:1024:len]
   len = length(hsh)
   while len > 1
       hsh = [i == len ? hsh[i] : sha256(vcat(hsh[i], hsh[i + 1])) for i in 1:2:len]
       len = length(hsh)
   end
   return bytes2hex(hsh[1])

end

println(merkletree())

</lang>

Output:
a4f902cf9d51fe51eda156a6792e1445dff65edf3a217a1f3334cc9cf1495c2c

Mathematica/Wolfram Language

<lang Mathematica>data=Import["https://rosettacode.org/mw/title.png","Byte"]; parts=Hash[ByteArray[#],"SHA256","ByteArray"]&/@Partition[data,UpTo[1024]]; parts=NestWhile[If[Length[#]==2,Hash[Join@@#,"SHA256","ByteArray"],First[#]]&/@Partition[#,UpTo[2]]&,parts,Length[#]>1&]; StringJoin[IntegerString[Normal[First[parts]],16]]</lang>

Output:
a4f92cf9d51fe51eda156a6792e1445dff65edf3a217a1f3334cc9cf1495c2c

Nim

Library: nimcrypto

To compute the digests of file blocks, we use the procedure “digest” which accepts the address of a byte array and a byte count. To compute the digests of pairs of digests, we use instead a SHA256 context and the procedures “update” and “finish”, which avoids a copy in an intermediate buffer. <lang Nim> import nimcrypto

const BlockSize = 1024

var hashes: seq[MDigest[256]]

let f = open("title.png") var buffer: array[BlockSize, byte] while true:

 let n = f.readBytes(buffer, 0, BlockSize)
 if n == 0: break
 hashes.add sha256.digest(buffer[0].addr, n.uint)

f.close()

var ctx: sha256 while hashes.len != 1:

 var newHashes: seq[MDigest[256]]
 for i in countup(0, hashes.high, 2):
   if i < hashes.high:
     ctx.init()
     ctx.update(hashes[i].data)
     ctx.update(hashes[i + 1].data)
     newHashes.add ctx.finish()
     ctx.clear()
   else:
     newHashes.add hashes[i]
 hashes= newHashes

echo hashes[0]</lang>

Output:
A4F902CF9D51FE51EDA156A6792E1445DFF65EDF3A217A1F3334CC9CF1495C2C

Pascal

Free Pascal

minimal modified Delphi version <lang pascal> program SHA256_Merkle_tree; {$IFDEF WINDOWS}

 {$APPTYPE CONSOLE}

{$ENDIF} {$IFDEF DELPHI} uses

 System.SysUtils,
 System.Classes,
 DCPsha256;

type

 TmyByte = TArray<Byte>;  
 TmyHashes = TArray<TArray<byte>>;

{$ENDIF} {$IFDEF FPC}

 {$Mode DELPHI}

uses

 SysUtils,
 Classes,
 DCPsha256;

type

 TmyByte = array of byte;
 TmyHashes = array of TmyByte;

{$ENDIF}

function SHA256(const Input: TmyByte; Len: Integer = -1): TmyByte; var

 Hasher: TDCP_sha256;
 l: Integer;

begin

 if Len < 0 then
   l := length(Input)
 else
   l := Len;
 Hasher := TDCP_sha256.Create(nil);
 try
   Hasher.Init;
   Hasher.Update(Input[0], l);
   SetLength(Result, Hasher.HashSize div 8);
   Hasher.final(Result[0]);
 finally
   Hasher.Free;
 end;

end;

function Merkle_tree(FileName: TFileName): string; const

 blockSize = 1024;

var

 f: TMemoryStream;
 hashes,
 hashes2: TmyHashes;
 bytesRead: Cardinal;
 buffer: TmyByte;
 i, index: Integer;
 b: byte;

begin

 Result := ;
 if not FileExists(FileName) then
   exit;

 SetLength(buffer, blockSize);
 FillChar(buffer[0], blockSize, #0);
 f := TMemoryStream.Create;
 f.LoadFromFile(FileName);
 index := 0;
 repeat
   //freepascal needs buffer[0] instead buffer
   bytesRead := f.Read(buffer[0], blockSize);
   if bytesRead= 0 then
     BREAK;
   Insert(SHA256(buffer, bytesRead), hashes, index);
   inc(index);
 until bytesRead<blockSize;
 f.Free;
 SetLength(buffer, 64);
 while Length(hashes) > 1 do
 begin
   //first clear old hashes2
   setlength(hashes2,0);  
   index := 0;
   i := 0;
   while i < length(hashes) do
   begin
     if i < length(hashes) - 1 then
     begin
       buffer := copy(hashes[i], 0, length(hashes[i]));
       buffer := concat(buffer,copy(hashes[i + 1], 0, length(hashes[i])));
       Insert(SHA256(buffer), hashes2, index);
       inc(index);
     end
     else
     begin
       Insert(hashes[i], hashes2, index);
       inc(index);
     end;
     inc(i, 2);
   end;
   hashes := hashes2;
 end;

 Result := ;
 for b in hashes[0] do
 begin
   Result := Result + b.ToHexString(2);
 end;

end;

begin

 writeln(Merkle_tree('title.png'));

{$IFDEF WINDOWS}

 readln;

{$ENDIF} end.</lang>

Output:
A4F902CF9D51FE51EDA156A6792E1445DFF65EDF3A217A1F3334CC9CF1495C2C

Perl

Translation of: Raku

<lang perl># 20210222 Perl programming solution

use strict; use warnings;

use Crypt::Digest::SHA256 'sha256' ;

my @blocks;

open my $fh, '<:raw', './title.png';

while ( read $fh, my $chunk, 1024 ) { push @blocks, sha256 $chunk }

while ( scalar @blocks > 1 ) {

  my @clone = @blocks and @blocks = ();
  while ( @_ = splice @clone, 0, 2 ) {
     push @blocks, scalar @_ == 1 ? $_[0] : sha256 $_[0].$_[1]
  }

}

print unpack ( 'H*', $blocks[0] ) , "\n";</lang>

Output:
a4f902cf9d51fe51eda156a6792e1445dff65edf3a217a1f3334cc9cf1495c2c

Phix

Library: Phix/libcurl
without javascript_semantics
include builtins\libcurl.e
include builtins\sha256.e
 
constant ONE_MB = 1024 * 1024
 
function merkle(string filename, url, integer block_size=ONE_MB)
    if not file_exists(filename) then
        printf(1,"Downloading %s...\n",{filename})
        CURLcode res = curl_easy_get_file(url,"",filename) -- (no proxy)
        if res!=CURLE_OK then
            string error = sprintf("%d",res)
            if res=CURLE_COULDNT_RESOLVE_HOST then
                error &= " [CURLE_COULDNT_RESOLVE_HOST]"
            end if
            crash("Error %s downloading file\n", {error})
        end if  
    end if  
    string data = get_text(filename)
    sequence blocks = {}
    for i=1 to length(data) by block_size do
        blocks = append(blocks,sha256(data[i..min(i+block_size-1,length(data))]))
    end for
    while length(blocks)>1 do
        integer l = 0
        for i=1 to length(blocks) by 2 do
            l += 1
            blocks[l] = iff(i<length(blocks)?sha256(blocks[i]&blocks[i+1])
                                            :blocks[i])
        end for
        blocks = blocks[1..l]
    end while
    return blocks[1]            
end function
 
function asHex(string s)
string res = ""
    for i=1 to length(s) do
        res &= sprintf("%02X",s[i])
    end for
    return res
end function
 
printf(1,"%s\n",asHex(merkle("title.png", "https://rosettacode.org/mw/title.png", 1024)))
Output:
a4f902cf9d51fe51eda156a6792e1445dff65edf3a217a1f3334cc9cf1495c2c

Python

This version attempts to combine blocks as soon as possible to minimize the memory footprint.

<lang Python>#!/usr/bin/env python

  1. compute the root label for a SHA256 Merkle tree built on blocks of a given
  2. size (default 1MB) taken from the given file(s)

import argh import hashlib import sys

@argh.arg('filename', nargs='?', default=None) def main(filename, block_size=1024*1024):

   if filename:
       fin = open(filename, 'rb')
   else: 
       fin = sys.stdin
   
   stack = []
   block = fin.read(block_size)
   while block:
       # a node is a pair: ( tree-level, hash )
       node = (0, hashlib.sha256(block).digest())
       stack.append(node)
       # concatenate adjacent pairs at the same level
       while len(stack) >= 2 and stack[-2][0] == stack[-1][0]:
           a = stack[-2]
           b = stack[-1]
           l = a[0]
           stack[-2:] = [(l+1, hashlib.sha256(a[1] + b[1]).digest())]
       block = fin.read(block_size)
   
   while len(stack) > 1:
       # at the end we have to concatenate even across levels
       a = stack[-2]
       b = stack[-1]
       al = a[0]
       bl = b[0]
       stack[-2:] = [(max(al, bl)+1, hashlib.sha256(a[1] + b[1]).digest())]
   print(stack[0][1].hex())


argh.dispatch_command(main) </lang>

Output:
$ sha256tree.py --block-size=1024 title.png
a4f902cf9d51fe51eda156a6792e1445dff65edf3a217a1f3334cc9cf1495c2c

Raku

<lang perl6>use Digest::SHA256::Native;

unit sub MAIN(Int :b(:$block-size) = 1024 × 1024, *@args);

my $in = @args ?? IO::CatHandle.new(@args) !! $*IN;

my @blocks = do while my $block = $in.read: $block-size { sha256 $block };

while @blocks > 1 {

 @blocks = @blocks.batch(2).map: { $_ > 1 ?? sha256([~] $_) !! .[0] }

}

say @blocks[0]».fmt('%02x').join;</lang>

Output:
$ sha256tree --block-size=1024 title.png
a4f902cf9d51fe51eda156a6792e1445dff65edf3a217a1f3334cc9cf1495c2c

Rust

<lang rust>extern crate crypto;

use crypto::digest::Digest; use crypto::sha2::Sha256; use std::fs::File; use std::io::prelude::*; use std::io::BufReader;

fn sha256_merkle_tree(filename: &str, block_size: usize) -> std::io::Result<Option<Vec<u8>>> {

   let mut md = Sha256::new();
   let mut input = BufReader::new(File::open(filename)?);
   let mut buffer = vec![0; block_size];
   let mut digest = vec![0; md.output_bytes()];
   let mut digests = Vec::new();
   loop {
       let bytes = input.read(&mut buffer)?;
       if bytes == 0 {
           break;
       }
       md.reset();
       md.input(&buffer[0..bytes]);
       md.result(&mut digest);
       digests.push(digest.clone());
   }
   let mut len = digests.len();
   if len == 0 {
       return Ok(None);
   }
   while len > 1 {
       let mut j = 0;
       let mut i = 0;
       while i < len {
           if i + 1 < len {
               md.reset();
               md.input(&digests[i]);
               md.input(&digests[i + 1]);
               md.result(&mut digests[j]);
           } else {
               digests.swap(i, j);
           }
           i += 2;
           j += 1;
       }
       len = j;
   }
   Ok(Some(digests[0].clone()))

}

fn digest_to_string(digest: &[u8]) -> String {

   let mut result = String::new();
   for x in digest {
       result.push_str(&format!("{:02x}", x));
   }
   result

}

fn main() {

   match sha256_merkle_tree("title.png", 1024) {
       Ok(Some(digest)) => println!("{}", digest_to_string(&digest)),
       Ok(None) => {}
       Err(error) => eprintln!("I/O error: {}", error),
   }

}</lang>

Output:
a4f902cf9d51fe51eda156a6792e1445dff65edf3a217a1f3334cc9cf1495c2c

Wren

Library: Wren-crypto
Library: Wren-seq
Library: Wren-str
Library: Wren-fmt

<lang ecmascript>import "io" for File import "/crypto" for Sha256, Bytes import "/seq" for Lst import "/str" for Str import "/fmt" for Conv

var bytes = File.read("title.png").bytes.toList var chunks = Lst.chunks(bytes, 1024) var hashes = List.filled(chunks.count, null) var i = 0 for (chunk in chunks) {

  var h = Sha256.digest(chunk.map { |b| String.fromByte(b) }.join())
  hashes[i] = Str.chunks(h, 2).map { |x| Conv.atoi(x, 16) }.toList
  i = i + 1

}

var buffer = List.filled(64, 0) while (hashes.count > 1) {

   var hashes2 = []
   var i = 0
   while (i < hashes.count) {
       if (i < hashes.count - 1) {
           for (j in  0..31) buffer[j] = hashes[i][j]
           for (j in  0..31) buffer[j+32] = hashes[i+1][j]
           var h = Sha256.digest(buffer.map { |b| String.fromByte(b) }.join())
           var hb = Str.chunks(h, 2).map { |x| Conv.atoi(x, 16) }.toList
           hashes2.add(hb)
       } else {
           hashes2.add(hashes[i])
       }
       i = i + 2
   }
   hashes = hashes2

} System.print(Bytes.toHexString(hashes[0]))</lang>

Output:
a4f902cf9d51fe51eda156a6792e1445dff65edf3a217a1f3334cc9cf1495c2c