Hex dump: Difference between revisions
(New draft task with Python example.) |
(Added FreeBASIC) |
||
Line 67: | Line 67: | ||
=={{header| |
=={{header|FreeBASIC}}== |
||
Code adapted from https://github.com/jlaasonen/hexfile |
|||
The code is from Jussi Laasonen (https://jlaasonen.me) |
|||
<syntaxhighlight lang="vb">'Usage: hexfile [-i <first index>] <file> |
|||
Const bytesPerLine = 16 |
|||
Const tabWidth = 3 |
|||
Const fileIndexWidth = 8 |
|||
Const firstIndex = 1 |
|||
Const asciiLowerBound = 31 |
|||
Const asciiUpperBound = 127 |
|||
Const emptyByte = " " |
|||
Const emptyAsciiByte = " " |
|||
Const nonAsciiByte = "." |
|||
Enum Keys Explicit |
|||
Up = &H48FF |
|||
PageUp = &H49FF |
|||
Down = &H50FF |
|||
PageDown = &H51FF |
|||
Home = &H47FF |
|||
End_ = &H4FFF |
|||
Esc = 27 |
|||
End Enum |
|||
Sub GetLine(Byval fileIndex As Longint, Byval fileNumber As Integer, bytes() As Ubyte) |
|||
Dim bytesread As Uinteger |
|||
If Get(#fileNumber,fileIndex,bytes(), ,bytesread) = 0 And bytesread > 0 Then |
|||
Redim Preserve bytes(bytesread) |
|||
Else |
|||
Erase bytes |
|||
End If |
|||
End Sub |
|||
Function ByteToAscii(Byval byte_ As Ubyte) As String |
|||
Return Iif(byte_ > asciiLowerBound And byte_ < asciiUpperBound, Chr(byte_), nonAsciiByte) |
|||
End Function |
|||
Function MakeLine(Byval firstIndexDisplay As Longint, Byval fileIndex As Longint, bytes() As Ubyte) As String |
|||
If Ubound(bytes) < Lbound(bytes) Then Return "" End If |
|||
Dim hexBytes As String = "" |
|||
Dim asciiBytes As String = "" |
|||
For byteIndex As Integer = Lbound(bytes) To Lbound(bytes) + bytesPerLine - 1 |
|||
If byteIndex <= Ubound(bytes) Then |
|||
Dim byte_ As Ubyte = bytes(byteIndex) |
|||
asciiBytes += ByteToAscii(byte_) |
|||
hexBytes += Hex(byte_, 2) |
|||
Else |
|||
asciiBytes += emptyAsciiByte |
|||
hexBytes += emptyByte |
|||
End If |
|||
hexBytes += Iif(byteIndex = (bytesPerLine\2)-1, " ", " ") |
|||
Next |
|||
Dim displayIndex As Const Longint = fileIndex - firstIndex + firstIndexDisplay |
|||
Return Space(1) + Hex(displayIndex,fileIndexWidth) + Space(tabWidth) +_ |
|||
hexBytes + Space(tabWidth-1) + asciiBytes |
|||
End Function |
|||
Sub DumpInteractive(Byval fileNumber As Integer, Byval firstIndexDisplay As Longint) |
|||
Dim consoleDimensions As Integer = Width() |
|||
Dim linesPerPage As Integer = Hiword(consoleDimensions) |
|||
Dim bytesPerPage As Integer = bytesPerline * linesPerPage |
|||
Dim numberOfFullLines As Longint = (Lof(fileNumber)-1) \ bytesPerLine |
|||
Dim lastLineIndex As Longint = numberOfFullLines * bytesPerLine + firstIndex |
|||
Dim lastPageIndex As Longint = lastLineIndex - bytesPerPage + bytesPerLine |
|||
If lastPageIndex < firstIndex Then lastPageIndex = firstIndex End If |
|||
Cls |
|||
Dim input_ As Long |
|||
Dim fileIndex As Longint = firstIndex |
|||
Do |
|||
Locate 1,1,0 |
|||
For lineNumber As Integer = 1 To linesPerPage |
|||
Dim lineIndex As Longint = fileIndex + (lineNumber-1)*bytesPerLine |
|||
Redim bytes(bytesPerLine) As Ubyte |
|||
GetLine(lineIndex, fileNumber, bytes()) |
|||
Dim lineText As String = MakeLine(firstIndexDisplay, lineIndex, bytes()) |
|||
If lineNumber = linesPerPage Then |
|||
Print lineText; |
|||
Else |
|||
Print lineText |
|||
End If |
|||
Next |
|||
input_ = Getkey |
|||
Select Case As Const input_ |
|||
Case Keys.PageUp |
|||
fileIndex -= bytesPerPage |
|||
Case Keys.Up |
|||
fileIndex -= bytesPerLine |
|||
Case Keys.PageDown |
|||
fileIndex += bytesPerPage |
|||
Case Keys.Down |
|||
fileIndex += bytesPerLine |
|||
Case Keys.Home |
|||
fileIndex = firstIndex |
|||
Case Keys.End_ |
|||
fileIndex = lastPageIndex |
|||
End Select |
|||
If fileIndex < firstIndex Then fileIndex = firstIndex |
|||
If fileIndex > lastPageIndex Then fileIndex = lastPageIndex |
|||
Loop Until input_ = Keys.Esc |
|||
End Sub |
|||
Sub PrintUsage() |
|||
Const firstColumn = 4 |
|||
Const secondColumn = 12 |
|||
Dim controls(0 To ..., 2) As String = {_ |
|||
{"Down", "Scroll down one line."},_ |
|||
{"Up", "Scroll up one line."},_ |
|||
{"PgDown", "Show next page."},_ |
|||
{"PgUp", "Show previos page."},_ |
|||
{"End", "Jump to the end of the file."},_ |
|||
{"Home", "Jump to the beginning of the file."},_ |
|||
{"Esc", "Quit."}_ |
|||
} |
|||
Print "hexfile - a simple commandline hexdumper" |
|||
Print " (c) 2021-2023 Jussi Laasonen. Licensed under the MIT license." |
|||
Print " See https://github.com/jlaasonen/hexfile for full license." |
|||
Print !"\nUsage: hexfile [-i <first index>] <file>" |
|||
Print !"\nDisplays an interactive hex dump the file." |
|||
Print !"\nControls:" |
|||
For row As Integer = Lbound(controls) To Ubound(controls) |
|||
Print Tab(firstColumn);controls(row,0);Tab(secondColumn);controls(row,1) |
|||
Next |
|||
Print !"\nOptions:" |
|||
Print " -i <first index>" |
|||
Print " Starts the displayed file index from the given index. Defaults to 0." |
|||
Print " The value is expected in decimal format. For binary, octal or hexadecimal" |
|||
Print " Use prefix &B, &O or &H respectively." |
|||
End Sub |
|||
Const fileIndexArgument = "-i" |
|||
Dim fileName As String = "" |
|||
Dim firstIndexDisplay As Longint = 0 |
|||
If __fb_argc__ = 2 Then |
|||
fileName = Command(1) |
|||
Elseif __fb_argc__ = 4 And Command(1) = fileIndexArgument Then |
|||
firstIndexDisplay = Vallng(Command(2)) |
|||
fileName = Command(3) |
|||
End If |
|||
Dim fileNumber As Long = Freefile |
|||
If fileName = "" Then |
|||
PrintUsage() |
|||
Elseif Open(fileName For Binary Access Read As #fileNumber) = 0 Then |
|||
DumpInteractive(fileNumber, firstIndexDisplay) |
|||
Close(fileNumber) |
|||
Else |
|||
Print "Failed to open file '" + fileName + "'." |
|||
End If</syntaxhighlight> |
|||
[https://jlaasonen.files.wordpress.com/2023/09/hexfile-1.3.1-windows-terminal-screenshot.png hexfile Windows terminal] |
|||
=={{header|Python}}== |
|||
<syntaxhighlight lang="python"> |
<syntaxhighlight lang="python"> |
||
"""Display bytes in a file like hexdump or xxd.""" |
"""Display bytes in a file like hexdump or xxd.""" |
||
Line 233: | Line 404: | ||
{{out}} |
{{out}} |
||
<pre>$ python hex_dump.py example_utf16.txt</pre> |
|||
<pre> |
|||
$ python hex_dump.py example_utf16.txt |
|||
</pre> |
|||
<pre> |
<pre> |
||
00000000 ff fe 52 00 6f 00 73 00 65 00 74 00 74 00 61 00 |..R.o.s.e.t.t.a.| |
00000000 ff fe 52 00 6f 00 73 00 65 00 74 00 74 00 61 00 |..R.o.s.e.t.t.a.| |
||
Line 247: | Line 415: | ||
00000068 |
00000068 |
||
</pre> |
</pre> |
||
<pre>$ python hex_dump.py example_utf16.txt -b</pre> |
|||
<pre> |
|||
$ python hex_dump.py example_utf16.txt -b |
|||
</pre> |
|||
<pre> |
<pre> |
||
00000000 11111111 11111110 01010010 00000000 01101111 00000000 |..R.o.| |
00000000 11111111 11111110 01010010 00000000 01101111 00000000 |..R.o.| |
Revision as of 17:31, 29 October 2023
A hex dump is a textual representation of bytes in a file.
hexdump is a command-line tool that can dump bytes from a file in a variety of formats, including hexadecimal, octal and ASCII.
hexdump's canonical format displays, on each line:
- a byte offset in hexadecimal,
- up to 16 bytes in hexadecimal separated by spaces, with an extra space between the 8th and 9th byte,
- the same 16 bytes interpreted as ASCII characters, with non-printing and non-ascii characters replaced with a dot (
.
), surrounded by pipes (|
).
The last line shows a final byte count.
For example, the string "Rosetta Code is a programming chrestomathy site 😀." encoded in UTF-16 (little-endian - the first two bytes are the byte order mark), displayed in the canonical format is:
00000000 ff fe 52 00 6f 00 73 00 65 00 74 00 74 00 61 00 |..R.o.s.e.t.t.a.| 00000010 20 00 43 00 6f 00 64 00 65 00 20 00 69 00 73 00 | .C.o.d.e. .i.s.| 00000020 20 00 61 00 20 00 70 00 72 00 6f 00 67 00 72 00 | .a. .p.r.o.g.r.| 00000030 61 00 6d 00 6d 00 69 00 6e 00 67 00 20 00 63 00 |a.m.m.i.n.g. .c.| 00000040 68 00 72 00 65 00 73 00 74 00 6f 00 6d 00 61 00 |h.r.e.s.t.o.m.a.| 00000050 74 00 68 00 79 00 20 00 73 00 69 00 74 00 65 00 |t.h.y. .s.i.t.e.| 00000060 20 00 3d d8 00 de 2e 00 | .=.....| 00000068
- Task
Implement a hexdump-like program that:
- outputs in the canonical format,
- takes an optional offset in bytes from which to start,
- takes an optional length in bytes after which it will stop.
Demonstrate your implementation by showing the canonical hex dump of the example above, plus any other examples you find useful.
- Stretch
xxd is another command-line tool similar to hexdump. It offers a binary mode where bytes are displayed in bits instead of hexadecimal.
Implement a binary mode. For this task, in binary mode, the example above should be displayed like this:
00000000 11111111 11111110 01010010 00000000 01101111 00000000 |..R.o.| 00000006 01110011 00000000 01100101 00000000 01110100 00000000 |s.e.t.| 0000000c 01110100 00000000 01100001 00000000 00100000 00000000 |t.a. .| 00000012 01000011 00000000 01101111 00000000 01100100 00000000 |C.o.d.| 00000018 01100101 00000000 00100000 00000000 01101001 00000000 |e. .i.| 0000001e 01110011 00000000 00100000 00000000 01100001 00000000 |s. .a.| 00000024 00100000 00000000 01110000 00000000 01110010 00000000 | .p.r.| 0000002a 01101111 00000000 01100111 00000000 01110010 00000000 |o.g.r.| 00000030 01100001 00000000 01101101 00000000 01101101 00000000 |a.m.m.| 00000036 01101001 00000000 01101110 00000000 01100111 00000000 |i.n.g.| 0000003c 00100000 00000000 01100011 00000000 01101000 00000000 | .c.h.| 00000042 01110010 00000000 01100101 00000000 01110011 00000000 |r.e.s.| 00000048 01110100 00000000 01101111 00000000 01101101 00000000 |t.o.m.| 0000004e 01100001 00000000 01110100 00000000 01101000 00000000 |a.t.h.| 00000054 01111001 00000000 00100000 00000000 01110011 00000000 |y. .s.| 0000005a 01101001 00000000 01110100 00000000 01100101 00000000 |i.t.e.| 00000060 00100000 00000000 00111101 11011000 00000000 11011110 | .=...| 00000066 00101110 00000000 |..| 00000068
Other hexdump/xxd features and a command line interface to your program are optional.
FreeBASIC
Code adapted from https://github.com/jlaasonen/hexfile
The code is from Jussi Laasonen (https://jlaasonen.me)
'Usage: hexfile [-i <first index>] <file>
Const bytesPerLine = 16
Const tabWidth = 3
Const fileIndexWidth = 8
Const firstIndex = 1
Const asciiLowerBound = 31
Const asciiUpperBound = 127
Const emptyByte = " "
Const emptyAsciiByte = " "
Const nonAsciiByte = "."
Enum Keys Explicit
Up = &H48FF
PageUp = &H49FF
Down = &H50FF
PageDown = &H51FF
Home = &H47FF
End_ = &H4FFF
Esc = 27
End Enum
Sub GetLine(Byval fileIndex As Longint, Byval fileNumber As Integer, bytes() As Ubyte)
Dim bytesread As Uinteger
If Get(#fileNumber,fileIndex,bytes(), ,bytesread) = 0 And bytesread > 0 Then
Redim Preserve bytes(bytesread)
Else
Erase bytes
End If
End Sub
Function ByteToAscii(Byval byte_ As Ubyte) As String
Return Iif(byte_ > asciiLowerBound And byte_ < asciiUpperBound, Chr(byte_), nonAsciiByte)
End Function
Function MakeLine(Byval firstIndexDisplay As Longint, Byval fileIndex As Longint, bytes() As Ubyte) As String
If Ubound(bytes) < Lbound(bytes) Then Return "" End If
Dim hexBytes As String = ""
Dim asciiBytes As String = ""
For byteIndex As Integer = Lbound(bytes) To Lbound(bytes) + bytesPerLine - 1
If byteIndex <= Ubound(bytes) Then
Dim byte_ As Ubyte = bytes(byteIndex)
asciiBytes += ByteToAscii(byte_)
hexBytes += Hex(byte_, 2)
Else
asciiBytes += emptyAsciiByte
hexBytes += emptyByte
End If
hexBytes += Iif(byteIndex = (bytesPerLine\2)-1, " ", " ")
Next
Dim displayIndex As Const Longint = fileIndex - firstIndex + firstIndexDisplay
Return Space(1) + Hex(displayIndex,fileIndexWidth) + Space(tabWidth) +_
hexBytes + Space(tabWidth-1) + asciiBytes
End Function
Sub DumpInteractive(Byval fileNumber As Integer, Byval firstIndexDisplay As Longint)
Dim consoleDimensions As Integer = Width()
Dim linesPerPage As Integer = Hiword(consoleDimensions)
Dim bytesPerPage As Integer = bytesPerline * linesPerPage
Dim numberOfFullLines As Longint = (Lof(fileNumber)-1) \ bytesPerLine
Dim lastLineIndex As Longint = numberOfFullLines * bytesPerLine + firstIndex
Dim lastPageIndex As Longint = lastLineIndex - bytesPerPage + bytesPerLine
If lastPageIndex < firstIndex Then lastPageIndex = firstIndex End If
Cls
Dim input_ As Long
Dim fileIndex As Longint = firstIndex
Do
Locate 1,1,0
For lineNumber As Integer = 1 To linesPerPage
Dim lineIndex As Longint = fileIndex + (lineNumber-1)*bytesPerLine
Redim bytes(bytesPerLine) As Ubyte
GetLine(lineIndex, fileNumber, bytes())
Dim lineText As String = MakeLine(firstIndexDisplay, lineIndex, bytes())
If lineNumber = linesPerPage Then
Print lineText;
Else
Print lineText
End If
Next
input_ = Getkey
Select Case As Const input_
Case Keys.PageUp
fileIndex -= bytesPerPage
Case Keys.Up
fileIndex -= bytesPerLine
Case Keys.PageDown
fileIndex += bytesPerPage
Case Keys.Down
fileIndex += bytesPerLine
Case Keys.Home
fileIndex = firstIndex
Case Keys.End_
fileIndex = lastPageIndex
End Select
If fileIndex < firstIndex Then fileIndex = firstIndex
If fileIndex > lastPageIndex Then fileIndex = lastPageIndex
Loop Until input_ = Keys.Esc
End Sub
Sub PrintUsage()
Const firstColumn = 4
Const secondColumn = 12
Dim controls(0 To ..., 2) As String = {_
{"Down", "Scroll down one line."},_
{"Up", "Scroll up one line."},_
{"PgDown", "Show next page."},_
{"PgUp", "Show previos page."},_
{"End", "Jump to the end of the file."},_
{"Home", "Jump to the beginning of the file."},_
{"Esc", "Quit."}_
}
Print "hexfile - a simple commandline hexdumper"
Print " (c) 2021-2023 Jussi Laasonen. Licensed under the MIT license."
Print " See https://github.com/jlaasonen/hexfile for full license."
Print !"\nUsage: hexfile [-i <first index>] <file>"
Print !"\nDisplays an interactive hex dump the file."
Print !"\nControls:"
For row As Integer = Lbound(controls) To Ubound(controls)
Print Tab(firstColumn);controls(row,0);Tab(secondColumn);controls(row,1)
Next
Print !"\nOptions:"
Print " -i <first index>"
Print " Starts the displayed file index from the given index. Defaults to 0."
Print " The value is expected in decimal format. For binary, octal or hexadecimal"
Print " Use prefix &B, &O or &H respectively."
End Sub
Const fileIndexArgument = "-i"
Dim fileName As String = ""
Dim firstIndexDisplay As Longint = 0
If __fb_argc__ = 2 Then
fileName = Command(1)
Elseif __fb_argc__ = 4 And Command(1) = fileIndexArgument Then
firstIndexDisplay = Vallng(Command(2))
fileName = Command(3)
End If
Dim fileNumber As Long = Freefile
If fileName = "" Then
PrintUsage()
Elseif Open(fileName For Binary Access Read As #fileNumber) = 0 Then
DumpInteractive(fileNumber, firstIndexDisplay)
Close(fileNumber)
Else
Print "Failed to open file '" + fileName + "'."
End If
Python
"""Display bytes in a file like hexdump or xxd."""
import abc
import math
from io import BufferedIOBase
from itertools import islice
from typing import Iterable
from typing import Iterator
from typing import Sequence
from typing import Tuple
from typing import TypeVar
READ_SIZE = 2048
class Formatter(abc.ABC):
"""Base class for hex dump formatters."""
@abc.abstractmethod
def __call__(self, data: Sequence[int]) -> str:
""""""
@property
@abc.abstractmethod
def bytes_per_line(self) -> int:
""""""
class CanonicalFormatter(Formatter):
bytes_per_line = 16
def __call__(self, data: Sequence[int]) -> str:
assert len(data) <= 16
hex = f"{bytes(data[:8]).hex(' ')} {bytes(data[8:]).hex(' ')}".ljust(48)
ascii_ = "".join(chr(b) if b > 31 and b < 127 else "." for b in data)
return f"{hex} |{ascii_}|"
class BinaryFormatter(Formatter):
bytes_per_line = 6
def __call__(self, data: Sequence[int]) -> str:
assert len(data) <= 6
bits = " ".join(bin(b)[2:].rjust(8, "0") for b in data).ljust(53)
ascii_ = "".join(chr(b) if b > 31 and b < 127 else "." for b in data)
return f"{bits} |{ascii_}|"
canonicalFormatter = CanonicalFormatter()
binaryFormatter = BinaryFormatter()
T = TypeVar("T")
def group(it: Iterable[T], n: int) -> Iterator[Tuple[T, ...]]:
"""Split iterable _it_ in to groups of size _n_.
The last group might contain less than _n_ items.
"""
_it = iter(it)
while True:
g = tuple(islice(_it, n))
if not g:
break
yield g
def hex_dump(
f: BufferedIOBase,
*,
skip: int = 0,
length: int = math.inf, # type: ignore
format: Formatter = canonicalFormatter,
) -> Iterator[str]:
"""Generate a textual representation of bytes in _f_, one line at a time."""
f.seek(skip)
offset = 0
byte_count = 0
previous_line = ""
identical_chunk = False
while byte_count < length:
# Read at most READ_SIZE bytes at a time.
data = f.read(READ_SIZE)
# Stop if we've run out of data.
if not data:
break
# Discard excess bytes if we've overshot length.
if byte_count + len(data) > length:
data = data[: length - byte_count]
# One line per chunk
for chunk in group(data, format.bytes_per_line):
line = format(chunk)
if previous_line == line:
if identical_chunk is False:
identical_chunk = True
yield "*"
else:
previous_line = line
identical_chunk = False
yield f"{offset:0>8x} {line}"
offset += format.bytes_per_line
byte_count += len(chunk)
# Final byte count
yield f"{byte_count:0>8x}"
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(
prog="hex_dump.py",
description="Display bytes in a file.",
)
parser.add_argument(
"file",
type=argparse.FileType(mode="rb"),
metavar="FILE",
help="target file to dump",
)
parser.add_argument(
"-b",
"--binary",
action="store_true",
help="display bytes in binary instead of hex",
)
parser.add_argument(
"-s",
"--skip",
type=int,
default=0,
help="skip SKIP bytes from the beginning",
)
parser.add_argument(
"-n",
"--length",
type=int,
default=math.inf,
help="read up to LENGTH bytes",
)
args = parser.parse_args()
formatter = binaryFormatter if args.binary else canonicalFormatter
for line in hex_dump(
args.file,
format=formatter,
skip=args.skip,
length=args.length,
):
print(line)
- Output:
$ python hex_dump.py example_utf16.txt
00000000 ff fe 52 00 6f 00 73 00 65 00 74 00 74 00 61 00 |..R.o.s.e.t.t.a.| 00000010 20 00 43 00 6f 00 64 00 65 00 20 00 69 00 73 00 | .C.o.d.e. .i.s.| 00000020 20 00 61 00 20 00 70 00 72 00 6f 00 67 00 72 00 | .a. .p.r.o.g.r.| 00000030 61 00 6d 00 6d 00 69 00 6e 00 67 00 20 00 63 00 |a.m.m.i.n.g. .c.| 00000040 68 00 72 00 65 00 73 00 74 00 6f 00 6d 00 61 00 |h.r.e.s.t.o.m.a.| 00000050 74 00 68 00 79 00 20 00 73 00 69 00 74 00 65 00 |t.h.y. .s.i.t.e.| 00000060 20 00 3d d8 00 de 2e 00 | .=.....| 00000068
$ python hex_dump.py example_utf16.txt -b
00000000 11111111 11111110 01010010 00000000 01101111 00000000 |..R.o.| 00000006 01110011 00000000 01100101 00000000 01110100 00000000 |s.e.t.| 0000000c 01110100 00000000 01100001 00000000 00100000 00000000 |t.a. .| 00000012 01000011 00000000 01101111 00000000 01100100 00000000 |C.o.d.| 00000018 01100101 00000000 00100000 00000000 01101001 00000000 |e. .i.| 0000001e 01110011 00000000 00100000 00000000 01100001 00000000 |s. .a.| 00000024 00100000 00000000 01110000 00000000 01110010 00000000 | .p.r.| 0000002a 01101111 00000000 01100111 00000000 01110010 00000000 |o.g.r.| 00000030 01100001 00000000 01101101 00000000 01101101 00000000 |a.m.m.| 00000036 01101001 00000000 01101110 00000000 01100111 00000000 |i.n.g.| 0000003c 00100000 00000000 01100011 00000000 01101000 00000000 | .c.h.| 00000042 01110010 00000000 01100101 00000000 01110011 00000000 |r.e.s.| 00000048 01110100 00000000 01101111 00000000 01101101 00000000 |t.o.m.| 0000004e 01100001 00000000 01110100 00000000 01101000 00000000 |a.t.h.| 00000054 01111001 00000000 00100000 00000000 01110011 00000000 |y. .s.| 0000005a 01101001 00000000 01110100 00000000 01100101 00000000 |i.t.e.| 00000060 00100000 00000000 00111101 11011000 00000000 11011110 | .=...| 00000066 00101110 00000000 |..| 00000068