Compiler/code generator: Difference between revisions

(J)
Line 2,516:
GENERATE EMIT BYE</lang>
Passes all tests.
 
=={{header|Fortran}}==
{{works with|gfortran|11.2.1}}
Fortran 2008/2018 code with C preprocessing. On case-sensitive systems, if you call the source file gen.F90, with a capital F, then gfortran will know to use the C preprocessor.
 
<lang fortran>module compiler_type_kinds
use, intrinsic :: iso_fortran_env, only: int32
use, intrinsic :: iso_fortran_env, only: int64
 
implicit none
private
 
! Synonyms.
integer, parameter, public :: size_kind = int64
integer, parameter, public :: length_kind = size_kind
integer, parameter, public :: nk = size_kind
 
! Synonyms for character capable of storing a Unicode code point.
integer, parameter, public :: unicode_char_kind = selected_char_kind ('ISO_10646')
integer, parameter, public :: ck = unicode_char_kind
 
! Synonyms for integers capable of storing a Unicode code point.
integer, parameter, public :: unicode_ichar_kind = int32
integer, parameter, public :: ick = unicode_ichar_kind
 
! Synonyms for integers in the virtual machine or the interpreter’s
! runtime. (The Rosetta Code task says integers in the virtual
! machine are 32-bit, but there is nothing in the task that prevents
! us using 64-bit integers in the compiler and interpreter.)
integer, parameter, public :: runtime_int_kind = int64
integer, parameter, public :: rik = runtime_int_kind
end module compiler_type_kinds
 
module helper_procedures
use, non_intrinsic :: compiler_type_kinds, only: nk, rik, ck
 
implicit none
private
 
public :: new_storage_size
public :: next_power_of_two
 
public :: isspace
public :: quoted_string
 
public :: int32_to_vm_bytes
public :: uint32_to_vm_bytes
public :: int32_from_vm_bytes
public :: uint32_from_vm_bytes
 
character(1, kind = ck), parameter :: horizontal_tab_char = char (9, kind = ck)
character(1, kind = ck), parameter :: linefeed_char = char (10, kind = ck)
character(1, kind = ck), parameter :: vertical_tab_char = char (11, kind = ck)
character(1, kind = ck), parameter :: formfeed_char = char (12, kind = ck)
character(1, kind = ck), parameter :: carriage_return_char = char (13, kind = ck)
character(1, kind = ck), parameter :: space_char = ck_' '
 
! The following is correct for Unix and its relatives.
character(1, kind = ck), parameter :: newline_char = linefeed_char
 
character(1, kind = ck), parameter :: backslash_char = char (92, kind = ck)
 
contains
 
elemental function new_storage_size (length_needed) result (size)
integer(kind = nk), intent(in) :: length_needed
integer(kind = nk) :: size
 
! Increase storage by orders of magnitude.
 
if (2_nk**32 < length_needed) then
size = huge (1_nk)
else
size = next_power_of_two (length_needed)
end if
end function new_storage_size
 
elemental function next_power_of_two (x) result (y)
integer(kind = nk), intent(in) :: x
integer(kind = nk) :: y
 
!
! It is assumed that no more than 64 bits are used.
!
! The branch-free algorithm is that of
! https://archive.is/nKxAc#RoundUpPowerOf2
!
! Fill in bits until one less than the desired power of two is
! reached, and then add one.
!
 
y = x - 1
y = ior (y, ishft (y, -1))
y = ior (y, ishft (y, -2))
y = ior (y, ishft (y, -4))
y = ior (y, ishft (y, -8))
y = ior (y, ishft (y, -16))
y = ior (y, ishft (y, -32))
y = y + 1
end function next_power_of_two
 
elemental function isspace (ch) result (bool)
character(1, kind = ck), intent(in) :: ch
logical :: bool
 
bool = (ch == horizontal_tab_char) .or. &
& (ch == linefeed_char) .or. &
& (ch == vertical_tab_char) .or. &
& (ch == formfeed_char) .or. &
& (ch == carriage_return_char) .or. &
& (ch == space_char)
end function isspace
 
function quoted_string (str) result (qstr)
character(*, kind = ck), intent(in) :: str
character(:, kind = ck), allocatable :: qstr
 
integer(kind = nk) :: n, i, j
 
! Compute n = the size of qstr.
n = 2_nk
do i = 1_nk, len (str, kind = nk)
select case (str(i:i))
case (newline_char, backslash_char)
n = n + 2
case default
n = n + 1
end select
end do
 
allocate (character(n, kind = ck) :: qstr)
 
! Quote the string.
qstr(1:1) = ck_'"'
j = 2_nk
do i = 1_nk, len (str, kind = nk)
select case (str(i:i))
case (newline_char)
qstr(j:j) = backslash_char
qstr((j + 1):(j + 1)) = ck_'n'
j = j + 2
case (backslash_char)
qstr(j:j) = backslash_char
qstr((j + 1):(j + 1)) = backslash_char
j = j + 2
case default
qstr(j:j) = str(i:i)
j = j + 1
end select
end do
if (j /= n) error stop ! Check code correctness.
qstr(n:n) = ck_'"'
end function quoted_string
 
subroutine int32_to_vm_bytes (n, bytes, i)
integer(kind = rik), intent(in) :: n
character(1), intent(inout) :: bytes(0:*)
integer(kind = rik), intent(in) :: i
 
!
! The virtual machine is presumed to be little-endian. Because I
! slightly prefer little-endian.
!
 
bytes(i) = achar (ibits (n, 0, 8))
bytes(i + 1) = achar (ibits (n, 8, 8))
bytes(i + 2) = achar (ibits (n, 16, 8))
bytes(i + 3) = achar (ibits (n, 24, 8))
end subroutine int32_to_vm_bytes
 
subroutine uint32_to_vm_bytes (n, bytes, i)
integer(kind = rik), intent(in) :: n
character(1), intent(inout) :: bytes(0:*)
integer(kind = rik), intent(in) :: i
 
call int32_to_vm_bytes (n, bytes, i)
end subroutine uint32_to_vm_bytes
 
subroutine int32_from_vm_bytes (n, bytes, i)
integer(kind = rik), intent(out) :: n
character(1), intent(in) :: bytes(0:*)
integer(kind = rik), intent(in) :: i
 
!
! The virtual machine is presumed to be little-endian. Because I
! slightly prefer little-endian.
!
 
call uint32_from_vm_bytes (n, bytes, i)
if (ibits (n, 31, 1) == 1) then
! Extend the sign bit.
n = ior (n, not ((2_rik ** 32) - 1))
end if
end subroutine int32_from_vm_bytes
 
subroutine uint32_from_vm_bytes (n, bytes, i)
integer(kind = rik), intent(out) :: n
character(1), intent(in) :: bytes(0:*)
integer(kind = rik), intent(in) :: i
 
!
! The virtual machine is presumed to be little-endian. Because I
! slightly prefer little-endian.
!
 
integer(kind = rik) :: n0, n1, n2, n3
 
n0 = iachar (bytes(i), kind = rik)
n1 = ishft (iachar (bytes(i + 1), kind = rik), 8)
n2 = ishft (iachar (bytes(i + 2), kind = rik), 16)
n3 = ishft (iachar (bytes(i + 3), kind = rik), 24)
n = ior (n0, ior (n1, ior (n2, n3)))
end subroutine uint32_from_vm_bytes
 
end module helper_procedures
 
module string_buffers
use, intrinsic :: iso_fortran_env, only: error_unit
use, intrinsic :: iso_fortran_env, only: int64
use, non_intrinsic :: compiler_type_kinds, only: nk, ck, ick
use, non_intrinsic :: helper_procedures
 
implicit none
private
 
public :: strbuf_t
public :: skip_whitespace
public :: skip_non_whitespace
public :: skip_whitespace_backwards
public :: at_end_of_line
 
type :: strbuf_t
integer(kind = nk), private :: len = 0
!
! ‘chars’ is made public for efficient access to the individual
! characters.
!
character(1, kind = ck), allocatable, public :: chars(:)
contains
procedure, pass, private :: ensure_storage => strbuf_t_ensure_storage
procedure, pass :: to_unicode_full_string => strbuf_t_to_unicode_full_string
procedure, pass :: to_unicode_substring => strbuf_t_to_unicode_substring
procedure, pass :: length => strbuf_t_length
procedure, pass :: set => strbuf_t_set
procedure, pass :: append => strbuf_t_append
generic :: to_unicode => to_unicode_full_string
generic :: to_unicode => to_unicode_substring
generic :: assignment(=) => set
end type strbuf_t
 
contains
 
function strbuf_t_to_unicode_full_string (strbuf) result (s)
class(strbuf_t), intent(in) :: strbuf
character(:, kind = ck), allocatable :: s
 
!
! This does not actually ensure that the string is valid Unicode;
! any 31-bit ‘character’ is supported.
!
 
integer(kind = nk) :: i
 
allocate (character(len = strbuf%len, kind = ck) :: s)
do i = 1, strbuf%len
s(i:i) = strbuf%chars(i)
end do
end function strbuf_t_to_unicode_full_string
 
function strbuf_t_to_unicode_substring (strbuf, i, j) result (s)
!
! ‘Extreme’ values of i and j are allowed, as shortcuts for ‘from
! the beginning’, ‘up to the end’, or ‘empty substring’.
!
class(strbuf_t), intent(in) :: strbuf
integer(kind = nk), intent(in) :: i, j
character(:, kind = ck), allocatable :: s
 
!
! This does not actually ensure that the string is valid Unicode;
! any 31-bit ‘character’ is supported.
!
 
integer(kind = nk) :: i1, j1
integer(kind = nk) :: n
integer(kind = nk) :: k
 
i1 = max (1_nk, i)
j1 = min (strbuf%len, j)
n = max (0_nk, (j1 - i1) + 1_nk)
 
allocate (character(n, kind = ck) :: s)
do k = 1, n
s(k:k) = strbuf%chars(i1 + (k - 1_nk))
end do
end function strbuf_t_to_unicode_substring
 
elemental function strbuf_t_length (strbuf) result (n)
class(strbuf_t), intent(in) :: strbuf
integer(kind = nk) :: n
 
n = strbuf%len
end function strbuf_t_length
 
subroutine strbuf_t_ensure_storage (strbuf, length_needed)
class(strbuf_t), intent(inout) :: strbuf
integer(kind = nk), intent(in) :: length_needed
 
integer(kind = nk) :: len_needed
integer(kind = nk) :: new_size
type(strbuf_t) :: new_strbuf
 
len_needed = max (length_needed, 1_nk)
 
if (.not. allocated (strbuf%chars)) then
! Initialize a new strbuf%chars array.
new_size = new_storage_size (len_needed)
allocate (strbuf%chars(1:new_size))
else if (ubound (strbuf%chars, 1) < len_needed) then
! Allocate a new strbuf%chars array, larger than the current
! one, but containing the same characters.
new_size = new_storage_size (len_needed)
allocate (new_strbuf%chars(1:new_size))
new_strbuf%chars(1:strbuf%len) = strbuf%chars(1:strbuf%len)
call move_alloc (new_strbuf%chars, strbuf%chars)
end if
end subroutine strbuf_t_ensure_storage
 
subroutine strbuf_t_set (dst, src)
class(strbuf_t), intent(inout) :: dst
class(*), intent(in) :: src
 
integer(kind = nk) :: n
integer(kind = nk) :: i
 
select type (src)
type is (character(*, kind = ck))
n = len (src, kind = nk)
call dst%ensure_storage(n)
do i = 1, n
dst%chars(i) = src(i:i)
end do
dst%len = n
type is (character(*))
n = len (src, kind = nk)
call dst%ensure_storage(n)
do i = 1, n
dst%chars(i) = src(i:i)
end do
dst%len = n
class is (strbuf_t)
n = src%len
call dst%ensure_storage(n)
dst%chars(1:n) = src%chars(1:n)
dst%len = n
class default
error stop
end select
end subroutine strbuf_t_set
 
subroutine strbuf_t_append (dst, src)
class(strbuf_t), intent(inout) :: dst
class(*), intent(in) :: src
 
integer(kind = nk) :: n_dst, n_src, n
integer(kind = nk) :: i
 
select type (src)
type is (character(*, kind = ck))
n_dst = dst%len
n_src = len (src, kind = nk)
n = n_dst + n_src
call dst%ensure_storage(n)
do i = 1, n_src
dst%chars(n_dst + i) = src(i:i)
end do
dst%len = n
type is (character(*))
n_dst = dst%len
n_src = len (src, kind = nk)
n = n_dst + n_src
call dst%ensure_storage(n)
do i = 1, n_src
dst%chars(n_dst + i) = src(i:i)
end do
dst%len = n
class is (strbuf_t)
n_dst = dst%len
n_src = src%len
n = n_dst + n_src
call dst%ensure_storage(n)
dst%chars((n_dst + 1):n) = src%chars(1:n_src)
dst%len = n
class default
error stop
end select
end subroutine strbuf_t_append
 
function skip_whitespace (strbuf, i) result (j)
class(strbuf_t), intent(in) :: strbuf
integer(kind = nk), intent(in) :: i
integer(kind = nk) :: j
 
logical :: done
 
j = i
done = .false.
do while (.not. done)
if (at_end_of_line (strbuf, j)) then
done = .true.
else if (.not. isspace (strbuf%chars(j))) then
done = .true.
else
j = j + 1
end if
end do
end function skip_whitespace
 
function skip_non_whitespace (strbuf, i) result (j)
class(strbuf_t), intent(in) :: strbuf
integer(kind = nk), intent(in) :: i
integer(kind = nk) :: j
 
logical :: done
 
j = i
done = .false.
do while (.not. done)
if (at_end_of_line (strbuf, j)) then
done = .true.
else if (isspace (strbuf%chars(j))) then
done = .true.
else
j = j + 1
end if
end do
end function skip_non_whitespace
 
function skip_whitespace_backwards (strbuf, i) result (j)
class(strbuf_t), intent(in) :: strbuf
integer(kind = nk), intent(in) :: i
integer(kind = nk) :: j
 
logical :: done
 
j = i
done = .false.
do while (.not. done)
if (j == -1) then
done = .true.
else if (.not. isspace (strbuf%chars(j))) then
done = .true.
else
j = j - 1
end if
end do
end function skip_whitespace_backwards
 
function at_end_of_line (strbuf, i) result (bool)
class(strbuf_t), intent(in) :: strbuf
integer(kind = nk), intent(in) :: i
logical :: bool
 
bool = (strbuf%length() < i)
end function at_end_of_line
 
end module string_buffers
 
module reading_one_line_from_a_stream
use, intrinsic :: iso_fortran_env, only: input_unit
use, intrinsic :: iso_fortran_env, only: error_unit
use, non_intrinsic :: compiler_type_kinds, only: nk, ck, ick
use, non_intrinsic :: string_buffers
 
implicit none
private
 
! get_line_from_stream: read an entire input line from a stream into
! a strbuf_t.
public :: get_line_from_stream
 
character(1, kind = ck), parameter :: linefeed_char = char (10, kind = ck)
 
! The following is correct for Unix and its relatives.
character(1, kind = ck), parameter :: newline_char = linefeed_char
 
contains
 
subroutine get_line_from_stream (unit_no, eof, no_newline, strbuf)
integer, intent(in) :: unit_no
logical, intent(out) :: eof ! End of file?
logical, intent(out) :: no_newline ! There is a line but it has no
! newline? (Thus eof also must
! be .true.)
class(strbuf_t), intent(inout) :: strbuf
 
character(1, kind = ck) :: ch
 
strbuf = ''
call get_ch (unit_no, eof, ch)
do while (.not. eof .and. ch /= newline_char)
call strbuf%append (ch)
call get_ch (unit_no, eof, ch)
end do
no_newline = eof .and. (strbuf%length() /= 0)
end subroutine get_line_from_stream
 
subroutine get_ch (unit_no, eof, ch)
!
! Read a single code point from the stream.
!
! Currently this procedure simply inputs ‘ASCII’ bytes rather than
! Unicode code points.
!
integer, intent(in) :: unit_no
logical, intent(out) :: eof
character(1, kind = ck), intent(out) :: ch
 
integer :: stat
character(1) :: c = '*'
 
eof = .false.
 
if (unit_no == input_unit) then
call get_input_unit_char (c, stat)
else
read (unit = unit_no, iostat = stat) c
end if
 
if (stat < 0) then
ch = ck_'*'
eof = .true.
else if (0 < stat) then
write (error_unit, '("Input error with status code ", I0)') stat
stop 1
else
ch = char (ichar (c, kind = ick), kind = ck)
end if
end subroutine get_ch
 
!!!
!!! If you tell gfortran you want -std=f2008 or -std=f2018, you likely
!!! will need to add also -fall-intrinsics or -U__GFORTRAN__
!!!
!!! The first way, you get the FGETC intrinsic. The latter way, you
!!! get the C interface code that uses getchar(3).
!!!
#ifdef __GFORTRAN__
 
subroutine get_input_unit_char (c, stat)
!
! The following works if you are using gfortran.
!
! (FGETC is considered a feature for backwards compatibility with
! g77. However, I know of no way to reconfigure input_unit as a
! Fortran 2003 stream, for use with ordinary ‘read’.)
!
character, intent(inout) :: c
integer, intent(out) :: stat
 
call fgetc (input_unit, c, stat)
end subroutine get_input_unit_char
 
#else
 
subroutine get_input_unit_char (c, stat)
!
! An alternative implementation of get_input_unit_char. This
! actually reads input from the C standard input, which might not
! be the same as input_unit.
!
use, intrinsic :: iso_c_binding, only: c_int
character, intent(inout) :: c
integer, intent(out) :: stat
 
interface
!
! Use getchar(3) to read characters from standard input. This
! assumes there is actually such a function available, and that
! getchar(3) does not exist solely as a macro. (One could write
! one’s own getchar() if necessary, of course.)
!
function getchar () result (c) bind (c, name = 'getchar')
use, intrinsic :: iso_c_binding, only: c_int
integer(kind = c_int) :: c
end function getchar
end interface
 
integer(kind = c_int) :: i_char
 
i_char = getchar ()
!
! The C standard requires that EOF have a negative value. If the
! value returned by getchar(3) is not EOF, then it will be
! representable as an unsigned char. Therefore, to check for end
! of file, one need only test whether i_char is negative.
!
if (i_char < 0) then
stat = -1
else
stat = 0
c = char (i_char)
end if
end subroutine get_input_unit_char
 
#endif
 
end module reading_one_line_from_a_stream
 
module ast_reader
 
!
! The AST will be read into an array. Perhaps that will improve
! locality, compared to storing the AST as many linked heap nodes.
!
! In any case, implementing the AST this way is an interesting
! problem.
!
 
use, intrinsic :: iso_fortran_env, only: input_unit
use, intrinsic :: iso_fortran_env, only: output_unit
use, intrinsic :: iso_fortran_env, only: error_unit
use, non_intrinsic :: compiler_type_kinds, only: nk, ck, ick, rik
use, non_intrinsic :: helper_procedures, only: next_power_of_two
use, non_intrinsic :: helper_procedures, only: new_storage_size
use, non_intrinsic :: string_buffers
use, non_intrinsic :: reading_one_line_from_a_stream
 
implicit none
private
 
public :: string_table_t
public :: ast_node_t
public :: ast_t
public :: read_ast
 
integer, parameter, public :: node_Nil = 0
integer, parameter, public :: node_Identifier = 1
integer, parameter, public :: node_String = 2
integer, parameter, public :: node_Integer = 3
integer, parameter, public :: node_Sequence = 4
integer, parameter, public :: node_If = 5
integer, parameter, public :: node_Prtc = 6
integer, parameter, public :: node_Prts = 7
integer, parameter, public :: node_Prti = 8
integer, parameter, public :: node_While = 9
integer, parameter, public :: node_Assign = 10
integer, parameter, public :: node_Negate = 11
integer, parameter, public :: node_Not = 12
integer, parameter, public :: node_Multiply = 13
integer, parameter, public :: node_Divide = 14
integer, parameter, public :: node_Mod = 15
integer, parameter, public :: node_Add = 16
integer, parameter, public :: node_Subtract = 17
integer, parameter, public :: node_Less = 18
integer, parameter, public :: node_LessEqual = 19
integer, parameter, public :: node_Greater = 20
integer, parameter, public :: node_GreaterEqual = 21
integer, parameter, public :: node_Equal = 22
integer, parameter, public :: node_NotEqual = 23
integer, parameter, public :: node_And = 24
integer, parameter, public :: node_Or = 25
 
type :: string_table_element_t
character(:, kind = ck), allocatable :: str
end type string_table_element_t
 
type :: string_table_t
integer(kind = nk), private :: len = 0_nk
type(string_table_element_t), allocatable, private :: strings(:)
contains
procedure, pass, private :: ensure_storage => string_table_t_ensure_storage
procedure, pass :: look_up_index => string_table_t_look_up_index
procedure, pass :: look_up_string => string_table_t_look_up_string
procedure, pass :: length => string_table_t_length
generic :: look_up => look_up_index
generic :: look_up => look_up_string
end type string_table_t
 
type :: ast_node_t
integer :: node_variety
 
! Runtime integer, symbol index, or string index.
integer(kind = rik) :: int
 
! The left branch begins at the next node. The right branch
! begins at the address of the left branch, plus the following.
integer(kind = nk) :: right_branch_offset
end type ast_node_t
 
type :: ast_t
integer(kind = nk), private :: len = 0_nk
type(ast_node_t), allocatable, public :: nodes(:)
contains
procedure, pass, private :: ensure_storage => ast_t_ensure_storage
end type ast_t
 
contains
 
subroutine string_table_t_ensure_storage (table, length_needed)
class(string_table_t), intent(inout) :: table
integer(kind = nk), intent(in) :: length_needed
 
integer(kind = nk) :: len_needed
integer(kind = nk) :: new_size
type(string_table_t) :: new_table
 
len_needed = max (length_needed, 1_nk)
 
if (.not. allocated (table%strings)) then
! Initialize a new table%strings array.
new_size = new_storage_size (len_needed)
allocate (table%strings(1:new_size))
else if (ubound (table%strings, 1) < len_needed) then
! Allocate a new table%strings array, larger than the current
! one, but containing the same strings.
new_size = new_storage_size (len_needed)
allocate (new_table%strings(1:new_size))
new_table%strings(1:table%len) = table%strings(1:table%len)
call move_alloc (new_table%strings, table%strings)
end if
end subroutine string_table_t_ensure_storage
 
elemental function string_table_t_length (table) result (len)
class(string_table_t), intent(in) :: table
integer(kind = nk) :: len
 
len = table%len
end function string_table_t_length
 
function string_table_t_look_up_index (table, str) result (index)
class(string_table_t), intent(inout) :: table
character(*, kind = ck), intent(in) :: str
integer(kind = rik) :: index
 
!
! This implementation simply stores the strings sequentially into
! an array. Obviously, for large numbers of strings, one might
! wish to do something more complex.
!
! Standard Fortran does not come, out of the box, with a massive
! runtime library for doing such things. They are, however, no
! longer nearly as challenging to implement in Fortran as they
! used to be.
!
 
integer(kind = nk) :: i
 
i = 1
index = 0
do while (index == 0)
if (i == table%len + 1) then
! The string is new and must be added to the table.
i = table%len + 1
if (huge (1_rik) < i) then
! String indices are assumed to be storable as runtime
! integers.
write (error_unit, '("string_table_t capacity exceeded")')
stop 1
end if
call table%ensure_storage(i)
table%len = i
allocate (table%strings(i)%str, source = str)
index = int (i, kind = rik)
else if (table%strings(i)%str == str) then
index = int (i, kind = rik)
else
i = i + 1
end if
end do
end function string_table_t_look_up_index
 
function string_table_t_look_up_string (table, index) result (str)
class(string_table_t), intent(inout) :: table
integer(kind = rik), intent(in) :: index
character(:, kind = ck), allocatable :: str
 
!
! This is the reverse of string_table_t_look_up_index: given an
! index, find the string.
!
 
if (index < 1 .or. table%len < index) then
! In correct code, this branch should never be reached.
error stop
else
allocate (str, source = table%strings(index)%str)
end if
end function string_table_t_look_up_string
 
subroutine ast_t_ensure_storage (ast, length_needed)
class(ast_t), intent(inout) :: ast
integer(kind = nk), intent(in) :: length_needed
 
integer(kind = nk) :: len_needed
integer(kind = nk) :: new_size
type(ast_t) :: new_ast
 
len_needed = max (length_needed, 1_nk)
 
if (.not. allocated (ast%nodes)) then
! Initialize a new ast%nodes array.
new_size = new_storage_size (len_needed)
allocate (ast%nodes(1:new_size))
else if (ubound (ast%nodes, 1) < len_needed) then
! Allocate a new ast%nodes array, larger than the current one,
! but containing the same nodes.
new_size = new_storage_size (len_needed)
allocate (new_ast%nodes(1:new_size))
new_ast%nodes(1:ast%len) = ast%nodes(1:ast%len)
call move_alloc (new_ast%nodes, ast%nodes)
end if
end subroutine ast_t_ensure_storage
 
subroutine read_ast (unit_no, strbuf, ast, symtab, strtab)
integer, intent(in) :: unit_no
type(strbuf_t), intent(inout) :: strbuf
type(ast_t), intent(inout) :: ast
type(string_table_t), intent(inout) :: symtab
type(string_table_t), intent(inout) :: strtab
 
logical :: eof
logical :: no_newline
integer(kind = nk) :: after_ast_address
ast%len = 0
symtab%len = 0
strtab%len = 0
call build_subtree (1_nk, after_ast_address)
 
contains
 
recursive subroutine build_subtree (here_address, after_subtree_address)
integer(kind = nk), value :: here_address
integer(kind = nk), intent(out) :: after_subtree_address
 
integer :: node_variety
integer(kind = nk) :: i, j
integer(kind = nk) :: left_branch_address
integer(kind = nk) :: right_branch_address
 
! Get a line from the parser output.
call get_line_from_stream (unit_no, eof, no_newline, strbuf)
 
if (eof) then
call ast_error
else
! Prepare to store a new node.
call ast%ensure_storage(here_address)
ast%len = here_address
 
! What sort of node is it?
i = skip_whitespace (strbuf, 1_nk)
j = skip_non_whitespace (strbuf, i)
node_variety = strbuf_to_node_variety (strbuf, i, j - 1)
 
ast%nodes(here_address)%node_variety = node_variety
 
select case (node_variety)
case (node_Nil)
after_subtree_address = here_address + 1
case (node_Identifier)
i = skip_whitespace (strbuf, j)
j = skip_non_whitespace (strbuf, i)
ast%nodes(here_address)%int = &
& strbuf_to_symbol_index (strbuf, i, j - 1, symtab)
after_subtree_address = here_address + 1
case (node_String)
i = skip_whitespace (strbuf, j)
j = skip_whitespace_backwards (strbuf, strbuf%length())
ast%nodes(here_address)%int = &
& strbuf_to_string_index (strbuf, i, j, strtab)
after_subtree_address = here_address + 1
case (node_Integer)
i = skip_whitespace (strbuf, j)
j = skip_non_whitespace (strbuf, i)
ast%nodes(here_address)%int = strbuf_to_int (strbuf, i, j - 1)
after_subtree_address = here_address + 1
case default
! The node is internal, and has left and right branches.
! The left branch will start at left_branch_address; the
! right branch will start at left_branch_address +
! right_side_offset.
left_branch_address = here_address + 1
! Build the left branch.
call build_subtree (left_branch_address, right_branch_address)
! Build the right_branch.
call build_subtree (right_branch_address, after_subtree_address)
ast%nodes(here_address)%right_branch_offset = &
& right_branch_address - left_branch_address
end select
 
end if
end subroutine build_subtree
end subroutine read_ast
 
function strbuf_to_node_variety (strbuf, i, j) result (node_variety)
class(strbuf_t), intent(in) :: strbuf
integer(kind = nk), intent(in) :: i, j
integer :: node_variety
 
!
! This function has not been optimized in any way, unless the
! Fortran compiler can optimize it.
!
! Something like a ‘radix tree search’ could be done on the
! characters of the strbuf. Or a perfect hash function. Or a
! binary search. Etc.
!
 
if (j == i - 1) then
call ast_error
else
select case (strbuf%to_unicode(i, j))
case (ck_";")
node_variety = node_Nil
case (ck_"Identifier")
node_variety = node_Identifier
case (ck_"String")
node_variety = node_String
case (ck_"Integer")
node_variety = node_Integer
case (ck_"Sequence")
node_variety = node_Sequence
case (ck_"If")
node_variety = node_If
case (ck_"Prtc")
node_variety = node_Prtc
case (ck_"Prts")
node_variety = node_Prts
case (ck_"Prti")
node_variety = node_Prti
case (ck_"While")
node_variety = node_While
case (ck_"Assign")
node_variety = node_Assign
case (ck_"Negate")
node_variety = node_Negate
case (ck_"Not")
node_variety = node_Not
case (ck_"Multiply")
node_variety = node_Multiply
case (ck_"Divide")
node_variety = node_Divide
case (ck_"Mod")
node_variety = node_Mod
case (ck_"Add")
node_variety = node_Add
case (ck_"Subtract")
node_variety = node_Subtract
case (ck_"Less")
node_variety = node_Less
case (ck_"LessEqual")
node_variety = node_LessEqual
case (ck_"Greater")
node_variety = node_Greater
case (ck_"GreaterEqual")
node_variety = node_GreaterEqual
case (ck_"Equal")
node_variety = node_Equal
case (ck_"NotEqual")
node_variety = node_NotEqual
case (ck_"And")
node_variety = node_And
case (ck_"Or")
node_variety = node_Or
case default
call ast_error
end select
end if
end function strbuf_to_node_variety
 
function strbuf_to_symbol_index (strbuf, i, j, symtab) result (int)
class(strbuf_t), intent(in) :: strbuf
integer(kind = nk), intent(in) :: i, j
type(string_table_t), intent(inout) :: symtab
integer(kind = rik) :: int
 
if (j == i - 1) then
call ast_error
else
int = symtab%look_up(strbuf%to_unicode (i, j))
end if
end function strbuf_to_symbol_index
 
function strbuf_to_int (strbuf, i, j) result (int)
class(strbuf_t), intent(in) :: strbuf
integer(kind = nk), intent(in) :: i, j
integer(kind = rik) :: int
 
integer :: stat
character(:, kind = ck), allocatable :: str
 
if (j < i) then
call ast_error
else
allocate (character(len = (j - i) + 1_nk, kind = ck) :: str)
str = strbuf%to_unicode (i, j)
read (str, *, iostat = stat) int
if (stat /= 0) then
call ast_error
end if
end if
end function strbuf_to_int
 
function strbuf_to_string_index (strbuf, i, j, strtab) result (int)
class(strbuf_t), intent(in) :: strbuf
integer(kind = nk), intent(in) :: i, j
type(string_table_t), intent(inout) :: strtab
integer(kind = rik) :: int
 
if (j == i - 1) then
call ast_error
else
int = strtab%look_up(strbuf_to_string (strbuf, i, j))
end if
end function strbuf_to_string_index
 
function strbuf_to_string (strbuf, i, j) result (str)
class(strbuf_t), intent(in) :: strbuf
integer(kind = nk), intent(in) :: i, j
character(:, kind = ck), allocatable :: str
 
character(1, kind = ck), parameter :: linefeed_char = char (10, kind = ck)
character(1, kind = ck), parameter :: backslash_char = char (92, kind = ck)
 
! The following is correct for Unix and its relatives.
character(1, kind = ck), parameter :: newline_char = linefeed_char
 
integer(kind = nk) :: k
integer(kind = nk) :: count
 
if (strbuf%chars(i) /= ck_'"' .or. strbuf%chars(j) /= ck_'"') then
call ast_error
else
! Count how many characters are needed.
count = 0
k = i + 1
do while (k < j)
count = count + 1
if (strbuf%chars(k) == backslash_char) then
k = k + 2
else
k = k + 1
end if
end do
 
allocate (character(len = count, kind = ck) :: str)
 
count = 0
k = i + 1
do while (k < j)
if (strbuf%chars(k) == backslash_char) then
if (k == j - 1) then
call ast_error
else
select case (strbuf%chars(k + 1))
case (ck_'n')
count = count + 1
str(count:count) = newline_char
case (backslash_char)
count = count + 1
str(count:count) = backslash_char
case default
call ast_error
end select
k = k + 2
end if
else
count = count + 1
str(count:count) = strbuf%chars(k)
k = k + 1
end if
end do
end if
end function strbuf_to_string
 
subroutine ast_error
!
! It might be desirable to give more detail.
!
write (error_unit, '("The AST input seems corrupted.")')
stop 1
end subroutine ast_error
 
end module ast_reader
 
module code_generation
 
!
! First we generate code as if the virtual machine itself were part
! of this program. Then we disassemble the generated code.
!
! Because we are targeting only the one output language, this seems
! an easy way to perform the task.
!
!
! A point worth noting: the virtual machine is a stack
! architecture.
!
! Stack architectures have a long history. Burroughs famously
! preferred stack architectures for running Algol programs. See, for
! instance,
! https://en.wikipedia.org/w/index.php?title=Burroughs_large_systems&oldid=1068076420
!
 
use, intrinsic :: iso_fortran_env, only: input_unit
use, intrinsic :: iso_fortran_env, only: output_unit
use, intrinsic :: iso_fortran_env, only: error_unit
use, non_intrinsic :: compiler_type_kinds
use, non_intrinsic :: helper_procedures
use, non_intrinsic :: ast_reader
 
implicit none
private
 
public :: generate_and_output_code
public :: generate_code
public :: output_code
 
! The virtual machine cannot handle integers of more than 32 bits,
! two’s-complement.
integer(kind = rik), parameter :: vm_huge_negint = -(2_rik ** 31_rik)
integer(kind = rik), parameter :: vm_huge_posint = (2_rik ** 31_rik) - 1_rik
 
! Arbitrarily chosen opcodes.
integer, parameter :: opcode_nop = 0 ! I think there should be a nop
! opcode, to reserve space for
! later hand-patching. :)
integer, parameter :: opcode_halt = 1 ! Does the ‘halt’ instruction
! apply brakes to the drum?
integer, parameter :: opcode_add = 2
integer, parameter :: opcode_sub = 3
integer, parameter :: opcode_mul = 4
integer, parameter :: opcode_div = 5
integer, parameter :: opcode_mod = 6
integer, parameter :: opcode_lt = 7
integer, parameter :: opcode_gt = 8
integer, parameter :: opcode_le = 9
integer, parameter :: opcode_ge = 10
integer, parameter :: opcode_eq = 11
integer, parameter :: opcode_ne = 12
integer, parameter :: opcode_and = 13
integer, parameter :: opcode_or = 14
integer, parameter :: opcode_neg = 15
integer, parameter :: opcode_not = 16
integer, parameter :: opcode_prtc = 17
integer, parameter :: opcode_prti = 18
integer, parameter :: opcode_prts = 19
integer, parameter :: opcode_fetch = 20
integer, parameter :: opcode_store = 21
integer, parameter :: opcode_push = 22
integer, parameter :: opcode_jmp = 23
integer, parameter :: opcode_jz = 24
 
character(8, kind = ck), parameter :: opcode_names(0:24) = &
& (/ "nop ", &
& "halt ", &
& "add ", &
& "sub ", &
& "mul ", &
& "div ", &
& "mod ", &
& "lt ", &
& "gt ", &
& "le ", &
& "ge ", &
& "eq ", &
& "ne ", &
& "and ", &
& "or ", &
& "neg ", &
& "not ", &
& "prtc ", &
& "prti ", &
& "prts ", &
& "fetch ", &
& "store ", &
& "push ", &
& "jmp ", &
& "jz " /)
 
type :: vm_code_t
integer(kind = rik), private :: len = 0_rik
character(1), allocatable :: bytes(:)
contains
procedure, pass, private :: ensure_storage => vm_code_t_ensure_storage
procedure, pass :: length => vm_code_t_length
end type vm_code_t
 
contains
 
subroutine vm_code_t_ensure_storage (code, length_needed)
class(vm_code_t), intent(inout) :: code
integer(kind = nk), intent(in) :: length_needed
 
integer(kind = nk) :: len_needed
integer(kind = nk) :: new_size
type(vm_code_t) :: new_code
 
len_needed = max (length_needed, 1_nk)
 
if (.not. allocated (code%bytes)) then
! Initialize a new code%bytes array.
new_size = new_storage_size (len_needed)
allocate (code%bytes(0:(new_size - 1)))
else if (ubound (code%bytes, 1) < len_needed - 1) then
! Allocate a new code%bytes array, larger than the current one,
! but containing the same bytes.
new_size = new_storage_size (len_needed)
allocate (new_code%bytes(0:(new_size - 1)))
new_code%bytes(0:(code%len - 1)) = code%bytes(0:(code%len - 1))
call move_alloc (new_code%bytes, code%bytes)
end if
end subroutine vm_code_t_ensure_storage
 
elemental function vm_code_t_length (code) result (len)
class(vm_code_t), intent(in) :: code
integer(kind = rik) :: len
 
len = code%len
end function vm_code_t_length
 
subroutine generate_and_output_code (outp, ast, symtab, strtab)
integer, intent(in) :: outp ! The unit to write the output to.
type(ast_t), intent(in) :: ast
type(string_table_t), intent(inout) :: symtab
type(string_table_t), intent(inout) :: strtab
 
type(vm_code_t) :: code
integer(kind = rik) :: i_vm
 
code%len = 0
i_vm = 0_rik
call generate_code (ast, 1_nk, i_vm, code)
call output_code (outp, symtab, strtab, code)
end subroutine generate_and_output_code
 
subroutine generate_code (ast, i_ast, i_vm, code)
type(ast_t), intent(in) :: ast
integer(kind = nk), intent(in) :: i_ast ! Index in the ast array.
integer(kind = rik), intent(inout) :: i_vm ! Address in the virtual machine.
type(vm_code_t), intent(inout) :: code
 
call traverse (i_ast)
 
! Generate a halt instruction.
call code%ensure_storage(i_vm + 1)
code%bytes(i_vm) = achar (opcode_halt)
i_vm = i_vm + 1
 
code%len = i_vm
 
contains
 
recursive subroutine traverse (i_ast)
integer(kind = nk), intent(in) :: i_ast ! Index in the ast array.
 
select case (ast%nodes(i_ast)%node_variety)
 
case (node_Nil)
continue
 
case (node_Integer)
block
integer(kind = rik) :: int_value
 
int_value = ast%nodes(i_ast)%int
call ensure_integer_is_vm_compatible (int_value)
call code%ensure_storage(i_vm + 5)
code%bytes(i_vm) = achar (opcode_push)
call int32_to_vm_bytes (int_value, code%bytes, i_vm + 1)
i_vm = i_vm + 5
end block
 
case (node_Identifier)
block
integer(kind = rik) :: variable_index
 
! In the best Fortran tradition, we indexed the variables
! starting at one; however, the virtual machine starts them
! at zero. So subtract 1.
variable_index = ast%nodes(i_ast)%int - 1
 
call ensure_integer_is_vm_compatible (variable_index)
call code%ensure_storage(i_vm + 5)
code%bytes(i_vm) = achar (opcode_fetch)
call uint32_to_vm_bytes (variable_index, code%bytes, i_vm + 1)
i_vm = i_vm + 5
end block
 
case (node_String)
block
integer(kind = rik) :: string_index
 
! In the best Fortran tradition, we indexed the strings
! starting at one; however, the virtual machine starts them
! at zero. So subtract 1.
string_index = ast%nodes(i_ast)%int - 1
 
call ensure_integer_is_vm_compatible (string_index)
call code%ensure_storage(i_vm + 5)
code%bytes(i_vm) = achar (opcode_push)
call uint32_to_vm_bytes (string_index, code%bytes, i_vm + 1)
i_vm = i_vm + 5
end block
 
case (node_Assign)
block
integer(kind = nk) :: i_left, i_right
integer(kind = rik) :: variable_index
 
i_left = left_branch (i_ast)
i_right = right_branch (i_ast)
 
! In the best Fortran tradition, we indexed the variables
! starting at one; however, the virtual machine starts them
! at zero. So subtract 1.
variable_index = ast%nodes(i_left)%int - 1
 
! Create code to push the right side onto the stack
call traverse (i_right)
 
! Create code to store that result into the variable on the
! left side.
call ensure_node_variety (node_Identifier, ast%nodes(i_left)%node_variety)
call ensure_integer_is_vm_compatible (variable_index)
call code%ensure_storage(i_vm + 5)
code%bytes(i_vm) = achar (opcode_store)
call uint32_to_vm_bytes (variable_index, code%bytes, i_vm + 1)
i_vm = i_vm + 5
end block
 
case (node_Multiply)
call traverse (left_branch (i_ast))
call traverse (right_branch (i_ast))
call code%ensure_storage(i_vm + 1)
code%bytes(i_vm) = achar (opcode_mul)
i_vm = i_vm + 1
 
case (node_Divide)
call traverse (left_branch (i_ast))
call traverse (right_branch (i_ast))
call code%ensure_storage(i_vm + 1)
code%bytes(i_vm) = achar (opcode_div)
i_vm = i_vm + 1
 
case (node_Mod)
call traverse (left_branch (i_ast))
call traverse (right_branch (i_ast))
call code%ensure_storage(i_vm + 1)
code%bytes(i_vm) = achar (opcode_mod)
i_vm = i_vm + 1
 
case (node_Add)
call traverse (left_branch (i_ast))
call traverse (right_branch (i_ast))
call code%ensure_storage(i_vm + 1)
code%bytes(i_vm) = achar (opcode_add)
i_vm = i_vm + 1
 
case (node_Subtract)
call traverse (left_branch (i_ast))
call traverse (right_branch (i_ast))
call code%ensure_storage(i_vm + 1)
code%bytes(i_vm) = achar (opcode_sub)
i_vm = i_vm + 1
 
case (node_Less)
call traverse (left_branch (i_ast))
call traverse (right_branch (i_ast))
call code%ensure_storage(i_vm + 1)
code%bytes(i_vm) = achar (opcode_lt)
i_vm = i_vm + 1
 
case (node_LessEqual)
call traverse (left_branch (i_ast))
call traverse (right_branch (i_ast))
call code%ensure_storage(i_vm + 1)
code%bytes(i_vm) = achar (opcode_le)
i_vm = i_vm + 1
 
case (node_Greater)
call traverse (left_branch (i_ast))
call traverse (right_branch (i_ast))
call code%ensure_storage(i_vm + 1)
code%bytes(i_vm) = achar (opcode_gt)
i_vm = i_vm + 1
 
case (node_GreaterEqual)
call traverse (left_branch (i_ast))
call traverse (right_branch (i_ast))
call code%ensure_storage(i_vm + 1)
code%bytes(i_vm) = achar (opcode_ge)
i_vm = i_vm + 1
 
case (node_Equal)
call traverse (left_branch (i_ast))
call traverse (right_branch (i_ast))
call code%ensure_storage(i_vm + 1)
code%bytes(i_vm) = achar (opcode_eq)
i_vm = i_vm + 1
 
case (node_NotEqual)
call traverse (left_branch (i_ast))
call traverse (right_branch (i_ast))
call code%ensure_storage(i_vm + 1)
code%bytes(i_vm) = achar (opcode_ne)
i_vm = i_vm + 1
 
case (node_Negate)
call ensure_node_variety (node_Nil, &
& ast%nodes(right_branch (i_ast))%node_variety)
call traverse (left_branch (i_ast))
call code%ensure_storage(i_vm + 1)
code%bytes(i_vm) = achar (opcode_neg)
i_vm = i_vm + 1
 
case (node_Not)
call ensure_node_variety (node_Nil, &
& ast%nodes(right_branch (i_ast))%node_variety)
call traverse (left_branch (i_ast))
call code%ensure_storage(i_vm + 1)
code%bytes(i_vm) = achar (opcode_not)
i_vm = i_vm + 1
 
case (node_And)
!
! This is not a short-circuiting AND and so differs from
! C. One would not notice the difference, except in side
! effects that (I believe) are not possible in our tiny
! language.
!
! Even in a language such as Fortran that has actual AND and
! OR operators, an optimizer may generate short-circuiting
! code and so spoil one’s expectations for side
! effects. (Therefore gfortran may issue a warning if you
! call an unpure function within an .AND. or
! .OR. expression.)
!
! A C equivalent to what we have our code generator doing
! (and to Fortran’s .AND. operator) might be something like
!
! #define AND(a, b) ((!!(a)) * (!!(b)))
!
! This macro takes advantage of the equivalence of AND to
! multiplication modulo 2. The ‘!!’ notations are a C idiom
! for converting values to 0 and 1.
!
call traverse (left_branch (i_ast))
call traverse (right_branch (i_ast))
call code%ensure_storage(i_vm + 1)
code%bytes(i_vm) = achar (opcode_and)
i_vm = i_vm + 1
 
case (node_Or)
!
! This is not a short-circuiting OR and so differs from
! C. One would not notice the difference, except in side
! effects that (I believe) are not possible in our tiny
! language.
!
! Even in a language such as Fortran that has actual AND and
! OR operators, an optimizer may generate short-circuiting
! code and so spoil one’s expectations for side
! effects. (Therefore gfortran may issue a warning if you
! call an unpure function within an .AND. or
! .OR. expression.)
!
! A C equivalent to what we have our code generator doing
! (and to Fortran’s .OR. operator) might be something like
!
! #define OR(a, b) (!( (!(a)) * (!(b)) ))
!
! This macro takes advantage of the equivalence of AND to
! multiplication modulo 2, and the equivalence of OR(a,b) to
! !AND(!a,!b). One could instead take advantage of the
! equivalence of OR to addition modulo 2:
!
! #define OR(a, b) ( ( (!!(a)) + (!!(b)) ) & 1 )
!
call traverse (left_branch (i_ast))
call traverse (right_branch (i_ast))
call code%ensure_storage(i_vm + 1)
code%bytes(i_vm) = achar (opcode_or)
i_vm = i_vm + 1
 
case (node_If)
block
integer(kind = nk) :: i_left, i_right
integer(kind = nk) :: i_right_then_left, i_right_then_right
logical :: there_is_an_else_clause
integer(kind = rik) :: fixup_address1
integer(kind = rik) :: fixup_address2
integer(kind = rik) :: relative_address
 
i_left = left_branch (i_ast)
i_right = right_branch (i_ast)
 
call ensure_node_variety (node_If, ast%nodes(i_right)%node_variety)
 
i_right_then_left = left_branch (i_right)
i_right_then_right = right_branch (i_right)
 
there_is_an_else_clause = &
& (ast%nodes(i_right_then_right)%node_variety /= node_Nil)
 
! Generate code for the predicate.
call traverse (i_left)
 
! Generate a conditional jump over the predicate-true code.
call code%ensure_storage(i_vm + 5)
code%bytes(i_vm) = achar (opcode_jz)
call int32_to_vm_bytes (0_rik, code%bytes, i_vm + 1)
fixup_address1 = i_vm + 1
i_vm = i_vm + 5
 
! Generate the predicate-true code.
call traverse (i_right_then_left)
 
if (there_is_an_else_clause) then
! Generate an unconditional jump over the predicate-true
! code.
call code%ensure_storage(i_vm + 5)
code%bytes(i_vm) = achar (opcode_jmp)
call int32_to_vm_bytes (0_rik, code%bytes, i_vm + 1)
fixup_address2 = i_vm + 1
i_vm = i_vm + 5
 
! Fix up the conditional jump, so it jumps to the
! predicate-false code.
relative_address = i_vm - fixup_address1
call int32_to_vm_bytes (relative_address, code%bytes, fixup_address1)
 
! Generate the predicate-false code.
call traverse (i_right_then_right)
 
! Fix up the unconditional jump, so it jumps past the
! predicate-false code.
relative_address = i_vm - fixup_address2
call int32_to_vm_bytes (relative_address, code%bytes, fixup_address2)
else
! Fix up the conditional jump, so it jumps past the
! predicate-true code.
relative_address = i_vm - fixup_address1
call int32_to_vm_bytes (relative_address, code%bytes, fixup_address1)
end if
end block
 
case (node_While)
block
 
!
! Note there is another common way to translate a
! while-loop which is to put (logically inverted) predicate
! code *after* the loop-body code, followed by a
! conditional jump to the start of the loop. You start the
! loop by unconditionally jumping to the predicate code.
!
! If our VM had a ‘jnz’ instruction, that translation would
! almost certainly be slightly better than this one. Given
! that we do not have a ‘jnz’, the code would end up
! slightly enlarged; one would have to put ‘not’ before the
! ‘jz’ at the bottom of the loop.
!
 
integer(kind = nk) :: i_left, i_right
integer(kind = rik) :: loop_address
integer(kind = rik) :: fixup_address
integer(kind = rik) :: relative_address
 
i_left = left_branch (i_ast)
i_right = right_branch (i_ast)
 
! Generate code for the predicate.
loop_address = i_vm
call traverse (i_left)
 
! Generate a conditional jump out of the loop.
call code%ensure_storage(i_vm + 5)
code%bytes(i_vm) = achar (opcode_jz)
call int32_to_vm_bytes (0_rik, code%bytes, i_vm + 1)
fixup_address = i_vm + 1
i_vm = i_vm + 5
 
! Generate code for the loop body.
call traverse (i_right)
 
! Generate an unconditional jump to the top of the loop.
call code%ensure_storage(i_vm + 5)
code%bytes(i_vm) = achar (opcode_jmp)
relative_address = loop_address - (i_vm + 1)
call int32_to_vm_bytes (relative_address, code%bytes, i_vm + 1)
i_vm = i_vm + 5
 
! Fix up the conditional jump, so it jumps after the loop
! body.
relative_address = i_vm - fixup_address
call int32_to_vm_bytes (relative_address, code%bytes, fixup_address)
end block
 
case (node_Prtc)
call ensure_node_variety (node_Nil, &
& ast%nodes(right_branch (i_ast))%node_variety)
call traverse (left_branch (i_ast))
call code%ensure_storage(i_vm + 1)
code%bytes(i_vm) = achar (opcode_prtc)
i_vm = i_vm + 1
 
case (node_Prti)
call ensure_node_variety (node_Nil, &
& ast%nodes(right_branch (i_ast))%node_variety)
call traverse (left_branch (i_ast))
call code%ensure_storage(i_vm + 1)
code%bytes(i_vm) = achar (opcode_prti)
i_vm = i_vm + 1
 
case (node_Prts)
call ensure_node_variety (node_Nil, &
& ast%nodes(right_branch (i_ast))%node_variety)
call traverse (left_branch (i_ast))
call code%ensure_storage(i_vm + 1)
code%bytes(i_vm) = achar (opcode_prts)
i_vm = i_vm + 1
 
case (node_Sequence)
call traverse (left_branch (i_ast))
call traverse (right_branch (i_ast))
 
case default
call bad_ast
 
end select
 
code%len = i_vm
 
end subroutine traverse
 
elemental function left_branch (i_here) result (i_left)
integer(kind = nk), intent(in) :: i_here
integer(kind = nk) :: i_left
 
i_left = i_here + 1
end function left_branch
 
elemental function right_branch (i_here) result (i_right)
integer(kind = nk), intent(in) :: i_here
integer(kind = nk) :: i_right
 
i_right = i_here + 1 + ast%nodes(i_here)%right_branch_offset
end function right_branch
 
subroutine ensure_node_variety (expected_node_variety, found_node_variety)
integer, intent(in) :: expected_node_variety
integer, intent(in) :: found_node_variety
if (expected_node_variety /= found_node_variety) call bad_ast
end subroutine ensure_node_variety
 
subroutine bad_ast
call codegen_error_message
write (error_unit, '("unexpected abstract syntax")')
stop 1
end subroutine bad_ast
 
end subroutine generate_code
 
subroutine output_code (outp, symtab, strtab, code)
integer, intent(in) :: outp ! The unit to write the output to.
type(string_table_t), intent(inout) :: symtab
type(string_table_t), intent(inout) :: strtab
type(vm_code_t), intent(in) :: code
 
call write_header (outp, symtab%length(), strtab%length())
call write_strings (outp, strtab)
call disassemble_instructions (outp, code)
end subroutine output_code
 
subroutine write_header (outp, data_size, strings_size)
integer, intent(in) :: outp
integer(kind = rik) :: data_size
integer(kind = rik) :: strings_size
 
call ensure_integer_is_vm_compatible (data_size)
call ensure_integer_is_vm_compatible (strings_size)
write (outp, '("Datasize: ", I0, " Strings: ", I0)') data_size, strings_size
end subroutine write_header
 
subroutine write_strings (outp, strtab)
integer, intent(in) :: outp
type(string_table_t), intent(inout) :: strtab
 
integer(kind = rik) :: i
 
do i = 1_rik, strtab%length()
write (outp, '(1A)') quoted_string (strtab%look_up(i))
end do
end subroutine write_strings
 
subroutine disassemble_instructions (outp, code)
integer, intent(in) :: outp
type(vm_code_t), intent(in) :: code
 
integer(kind = rik) :: i_vm
integer :: opcode
integer(kind = rik) :: n
 
i_vm = 0_rik
do while (i_vm /= code%length())
call write_vm_code_address (outp, i_vm)
opcode = iachar (code%bytes(i_vm))
call write_vm_opcode (outp, opcode)
select case (opcode)
case (opcode_push)
call int32_from_vm_bytes (n, code%bytes, i_vm + 1)
call write_vm_int_literal (outp, n)
i_vm = i_vm + 5
case (opcode_fetch, opcode_store)
call uint32_from_vm_bytes (n, code%bytes, i_vm + 1)
call write_vm_data_address (outp, n)
i_vm = i_vm + 5
case (opcode_jmp, opcode_jz)
call int32_from_vm_bytes (n, code%bytes, i_vm + 1)
call write_vm_jump_address (outp, n, i_vm + 1)
i_vm = i_vm + 5
case default
i_vm = i_vm + 1
end select
write (outp, '()', advance = 'yes')
end do
end subroutine disassemble_instructions
 
subroutine write_vm_code_address (outp, i_vm)
integer, intent(in) :: outp
integer(kind = rik), intent(in) :: i_vm
 
! 10 characters is wide enough for any 32-bit unsigned number.
write (outp, '(I10, 1X)', advance = 'no') i_vm
end subroutine write_vm_code_address
 
subroutine write_vm_opcode (outp, opcode)
integer, intent(in) :: outp
integer, intent(in) :: opcode
 
character(8, kind = ck) :: opcode_name
 
opcode_name = opcode_names(opcode)
 
select case (opcode)
case (opcode_push, opcode_fetch, opcode_store, opcode_jz, opcode_jmp)
write (outp, '(1A)', advance = 'no') opcode_name(1:6)
case default
write (outp, '(1A)', advance = 'no') trim (opcode_name)
end select
end subroutine write_vm_opcode
 
subroutine write_vm_int_literal (outp, n)
integer, intent(in) :: outp
integer(kind = rik), intent(in) :: n
 
write (outp, '(I0)', advance = 'no') n
end subroutine write_vm_int_literal
 
subroutine write_vm_data_address (outp, i)
integer, intent(in) :: outp
integer(kind = rik), intent(in) :: i
 
write (outp, '("[", I0, "]")', advance = 'no') i
end subroutine write_vm_data_address
 
subroutine write_vm_jump_address (outp, relative_address, i_vm)
integer, intent(in) :: outp
integer(kind = rik), intent(in) :: relative_address
integer(kind = rik), intent(in) :: i_vm
 
write (outp, '(" (", I0, ") ", I0)', advance = 'no') &
& relative_address, i_vm + relative_address
end subroutine write_vm_jump_address
 
subroutine ensure_integer_is_vm_compatible (n)
integer(kind = rik), intent(in) :: n
!
! It would seem desirable to check this in the syntax analyzer,
! instead, so line and column numbers can be given. But checking
! here will not hurt.
!
if (n < vm_huge_negint .or. vm_huge_posint < n) then
call codegen_error_message
write (error_unit, '("integer is too large for the virtual machine: ", I0)') n
stop 1
end if
end subroutine ensure_integer_is_vm_compatible
 
subroutine codegen_error_message
write (error_unit, '("Code generation error: ")', advance = 'no')
end subroutine codegen_error_message
 
end module code_generation
 
program gen
use, intrinsic :: iso_fortran_env, only: input_unit
use, intrinsic :: iso_fortran_env, only: output_unit
use, intrinsic :: iso_fortran_env, only: error_unit
use, non_intrinsic :: compiler_type_kinds
use, non_intrinsic :: string_buffers
use, non_intrinsic :: ast_reader
use, non_intrinsic :: code_generation
 
implicit none
 
integer, parameter :: inp_unit_no = 100
integer, parameter :: outp_unit_no = 101
 
integer :: arg_count
character(200) :: arg
integer :: inp
integer :: outp
 
type(strbuf_t) :: strbuf
type(ast_t) :: ast
type(string_table_t) :: symtab
type(string_table_t) :: strtab
 
arg_count = command_argument_count ()
if (3 <= arg_count) then
call print_usage
else
if (arg_count == 0) then
inp = input_unit
outp = output_unit
else if (arg_count == 1) then
call get_command_argument (1, arg)
inp = open_for_input (trim (arg))
outp = output_unit
else if (arg_count == 2) then
call get_command_argument (1, arg)
inp = open_for_input (trim (arg))
call get_command_argument (2, arg)
outp = open_for_output (trim (arg))
end if
 
call read_ast (inp, strbuf, ast, symtab, strtab)
call generate_and_output_code (outp, ast, symtab, strtab)
end if
 
contains
 
function open_for_input (filename) result (unit_no)
character(*), intent(in) :: filename
integer :: unit_no
 
integer :: stat
 
open (unit = inp_unit_no, file = filename, status = 'old', &
& action = 'read', access = 'stream', form = 'unformatted', &
& iostat = stat)
if (stat /= 0) then
write (error_unit, '("Error: failed to open ", 1A, " for input")') filename
stop 1
end if
unit_no = inp_unit_no
end function open_for_input
 
function open_for_output (filename) result (unit_no)
character(*), intent(in) :: filename
integer :: unit_no
 
integer :: stat
 
open (unit = outp_unit_no, file = filename, action = 'write', iostat = stat)
if (stat /= 0) then
write (error_unit, '("Error: failed to open ", 1A, " for output")') filename
stop 1
end if
unit_no = outp_unit_no
end function open_for_output
 
subroutine print_usage
character(200) :: progname
 
call get_command_argument (0, progname)
write (output_unit, '("Usage: ", 1A, " [INPUT_FILE [OUTPUT_FILE]]")') &
& trim (progname)
end subroutine print_usage
end program gen</lang>
 
{{out}}
$ ./lex compiler-tests/count.t | ./parse | ./gen
<pre>Datasize: 1 Strings: 2
"count is: "
"\n"
0 push 1
5 store [0]
10 fetch [0]
15 push 10
20 lt
21 jz (43) 65
26 push 0
31 prts
32 fetch [0]
37 prti
38 push 1
43 prts
44 fetch [0]
49 push 1
54 add
55 store [0]
60 jmp (-51) 10
65 halt</pre>
 
=={{header|Go}}==
1,448

edits