Compiler/code generator: Difference between revisions

← Older edit

Compiler/code generator (view source)

Revision as of 15:48, 20 November 2023

130,441 bytes added , 5 months ago

m

→‎{{header|Wren}}: Minor tidy

PureFox

9,476

edits

Revision as of 20:44, 29 January 2022 (view source) Rdm (talk \| contribs) (J) ← Older edit		Latest revision as of 15:48, 20 November 2023 (view source) PureFox (talk \| contribs) m (→‎{{header\|Wren}}: Minor tidy)
(10 intermediate revisions by 5 users not shown)
Line 4: into lower level code, either assembly, object, or virtual. =={{header\|Task}}== ~~{{task heading}}~~ Take the output of the Syntax analyzer [[Compiler/syntax_analyzer\|task]] - which is a [[Flatten_a_list\|flattened]] Abstract Syntax Tree (AST) - and convert it to virtual machine code, that can be run by the Line 34: \|- \| style="vertical-align:top" \| <~~lang~~syntaxhighlight lang="c">count = 1; while (count < 10) { print("count is: ", count, "\n"); count = count + 1; }</~~lang~~syntaxhighlight> \| style="vertical-align:top" \| Line 136: Loading this data into an internal parse tree should be as simple as: <~~lang~~syntaxhighlight lang="python"> def load_ast() line = readline() Line 158: right = load_ast() return make_node(node_type, left, right) </syntaxhighlight> ~~</lang>~~ ; Output format - refer to the table above Line 270: <br> As noted in the code, the generated IL is naive - the sample focuses on simplicity. <~~lang~~syntaxhighlight lang="algol68"># RC Compiler code generator # COMMENT this writes a .NET IL assembler source to standard output. Line 557: code header; gen( code ); code trailer</~~lang~~syntaxhighlight> {{out}} <pre> Line 601: =={{header\|ALGOL W}}== <~~lang~~syntaxhighlight lang="algolw">begin % code generator % % parse tree nodes % record node( integer type Line 966: genOp0( oHalt ); emitCode end.</~~lang~~syntaxhighlight> {{out}} The While Counter example Line 991: 60 jmp (-51) 10 65 halt </pre> =={{header\|ATS}}== For ATS2 with a garbage collector. <syntaxhighlight lang="ats"> (* The Rosetta Code code generator in ATS2. ) ( Usage: gen [INPUTFILE [OUTPUTFILE]] If INPUTFILE or OUTPUTFILE is "-" or missing, then standard input or standard output is used, respectively. ) ( Note: you might wish to add code to catch exceptions and print nice messages. ) (------------------------------------------------------------------) #define ATS_DYNLOADFLAG 0 #include "share/atspre_staload.hats" staload UN = "prelude/SATS/unsafe.sats" #define NIL list_vt_nil () #define :: list_vt_cons %{^ / alloca(3) is needed for ATS exceptions. / #include <alloca.h> %} exception internal_error of () exception bad_ast_node_type of string exception premature_end_of_input of () exception bad_number_field of string exception missing_identifier_field of () exception bad_quoted_string of string ( Some implementations that are likely missing from the prelude. ) implement g0uint2int<sizeknd, llintknd> x = $UN.cast x implement g0uint2uint<sizeknd, ullintknd> x = $UN.cast x implement g0uint2int<ullintknd, llintknd> x = $UN.cast x (------------------------------------------------------------------) extern fn {} skip_characters$skipworthy (c : char) :<> bool fn {} skip_characters {n : int} {i : nat \| i <= n} (s : string n, i : size_t i) :<> [j : int \| i <= j; j <= n] size_t j = let fun loop {k : int \| i <= k; k <= n} .<n - k>. (k : size_t k) :<> [j : int \| k <= j; j <= n] size_t j = if string_is_atend (s, k) then k else if ~skip_characters$skipworthy (string_get_at (s, k)) then k else loop (succ k) in loop i end fn skip_whitespace {n : int} {i : nat \| i <= n} (s : string n, i : size_t i) :<> [j : int \| i <= j; j <= n] size_t j = let implement skip_characters$skipworthy<> c = isspace c in skip_characters<> (s, i) end fn skip_nonwhitespace {n : int} {i : nat \| i <= n} (s : string n, i : size_t i) :<> [j : int \| i <= j; j <= n] size_t j = let implement skip_characters$skipworthy<> c = ~isspace c in skip_characters<> (s, i) end fn skip_nonquote {n : int} {i : nat \| i <= n} (s : string n, i : size_t i) :<> [j : int \| i <= j; j <= n] size_t j = let implement skip_characters$skipworthy<> c = c <> '"' in skip_characters<> (s, i) end fn skip_to_end {n : int} {i : nat \| i <= n} (s : string n, i : size_t i) :<> [j : int \| i <= j; j <= n] size_t j = let implement skip_characters$skipworthy<> c = true in skip_characters<> (s, i) end (------------------------------------------------------------------) fn substring_equals {n : int} {i, j : nat \| i <= j; j <= n} (s : string n, i : size_t i, j : size_t j, t : string) :<> bool = let val m = strlen t in if j - i <> m then false ( The substring is the wrong length. ) else let val p_s = ptrcast s and p_t = ptrcast t in 0 = $extfcall (int, "strncmp", ptr_add<char> (p_s, i), p_t, m) end end (------------------------------------------------------------------) datatype node_type_t = \| NullNode \| Identifier \| String \| Integer \| Sequence \| If \| Prtc \| Prts \| Prti \| While \| Assign \| Negate \| Not \| Multiply \| Divide \| Mod \| Add \| Subtract \| Less \| LessEqual \| Greater \| GreaterEqual \| Equal \| NotEqual \| And \| Or #define ARBITRARY_NODE_ARG 1234 datatype ast_node_t = \| ast_node_t_nil \| ast_node_t_nonnil of node_contents_t where node_contents_t = @{ node_type = node_type_t, node_arg = ullint, node_left = ast_node_t, node_right = ast_node_t } fn get_node_type {n : int} {i : nat \| i <= n} (s : string n, i : size_t i) : [j : int \| i <= j; j <= n] @(node_type_t, size_t j) = let val i_start = skip_whitespace (s, i) val i_end = skip_nonwhitespace (s, i_start) macdef eq t = substring_equals (s, i_start, i_end, ,(t)) val node_type = if eq ";" then NullNode else if eq "Identifier" then Identifier else if eq "String" then String else if eq "Integer" then Integer else if eq "Sequence" then Sequence else if eq "If" then If else if eq "Prtc" then Prtc else if eq "Prts" then Prts else if eq "Prti" then Prti else if eq "While" then While else if eq "Assign" then Assign else if eq "Negate" then Negate else if eq "Not" then Not else if eq "Multiply" then Multiply else if eq "Divide" then Divide else if eq "Mod" then Mod else if eq "Add" then Add else if eq "Subtract" then Subtract else if eq "Less" then Less else if eq "LessEqual" then LessEqual else if eq "Greater" then Greater else if eq "GreaterEqual" then GreaterEqual else if eq "Equal" then Equal else if eq "NotEqual" then NotEqual else if eq "And" then And else if eq "Or" then Or else let val s_bad = strnptr2string (string_make_substring (s, i_start, i_end - i_start)) in $raise bad_ast_node_type s_bad end in @(node_type, i_end) end fn get_unsigned {n : int} {i : nat \| i <= n} (s : string n, i : size_t i) : [j : int \| i <= j; j <= n] @(ullint, size_t j) = let val i = skip_whitespace (s, i) val [j : int] j = skip_nonwhitespace (s, i) in if j = i then $raise bad_number_field "" else let fun loop {k : int \| i <= k; k <= j} (k : size_t k, v : ullint) : ullint = if k = j then v else let val c = string_get_at (s, k) in if ~isdigit c then let val s_bad = strnptr2string (string_make_substring (s, i, j - i)) in $raise bad_number_field s_bad end else let val digit = char2int1 c - char2int1 '0' val () = assertloc (0 <= digit) in loop (succ k, (g1i2u 10 v) + g1i2u digit) end end in @(loop (i, g0i2u 0), j) end end fn get_identifier {n : int} {i : nat \| i <= n} (s : string n, i : size_t i) : [j : int \| i <= j; j <= n] @(string, size_t j) = let val i = skip_whitespace (s, i) val j = skip_nonwhitespace (s, i) in if i = j then $raise missing_identifier_field () else let val ident = strnptr2string (string_make_substring (s, i, j - i)) in @(ident, j) end end fn get_quoted_string {n : int} {i : nat \| i <= n} (s : string n, i : size_t i) : [j : int \| i <= j; j <= n] @(string, size_t j) = let val i = skip_whitespace (s, i) in if string_is_atend (s, i) then $raise bad_quoted_string "" else if string_get_at (s, i) <> '"' then let val j = skip_to_end (s, i) val s_bad = strnptr2string (string_make_substring (s, i, j - i)) in $raise bad_quoted_string s_bad end else let val j = skip_nonquote (s, succ i) in if string_is_atend (s, j) then let val s_bad = strnptr2string (string_make_substring (s, i, j - i)) in $raise bad_quoted_string s_bad end else let val quoted_string = strnptr2string (string_make_substring (s, i, succ j - i)) in @(quoted_string, succ j) end end end fn collect_string {n : int} (str : string, strings : &list_vt (string, n) >> list_vt (string, m)) : #[m : int \| m == n \|\| m == n + 1] [str_num : nat \| str_num <= m] size_t str_num = (* This implementation uses ‘list_vt’ instead of ‘list’, so appending elements to the end of the list will be both efficient and safe. It would also have been reasonable to build a ‘list’ backwards and then make a reversed copy. ) let fun find_or_extend {i : nat \| i <= n} .<n - i>. (strings1 : &list_vt (string, n - i) >> list_vt (string, m), i : size_t i) : #[m : int \| m == n - i \|\| m == n - i + 1] [j : nat \| j <= n] size_t j = case+ strings1 of \| ~ NIL => let ( The string is not there. Extend the list. ) prval () = prop_verify {i == n} () in strings1 := (str :: NIL); i end \| @ (head :: tail) => if head = str then let ( The string is found. ) prval () = fold@ strings1 in i end else let ( Continue looking. ) val j = find_or_extend (tail, succ i) prval () = fold@ strings1 in j end prval () = lemma_list_vt_param strings val n = i2sz (length strings) and j = find_or_extend (strings, i2sz 0) in j end fn load_ast (inpf : FILEref, idents : &List_vt string >> _, strings : &List_vt string >> _) : ast_node_t = let fun recurs (idents : &List_vt string >> _, strings : &List_vt string >> _) : ast_node_t = if fileref_is_eof inpf then $raise premature_end_of_input () else let val s = strptr2string (fileref_get_line_string inpf) prval () = lemma_string_param s ( String length >= 0. ) val i = i2sz 0 val @(node_type, i) = get_node_type (s, i) in case+ node_type of \| NullNode () => ast_node_t_nil () \| Integer () => let val @(number, _) = get_unsigned (s, i) in ast_node_t_nonnil @{ node_type = node_type, node_arg = number, node_left = ast_node_t_nil, node_right = ast_node_t_nil } end \| Identifier () => let val @(ident, _) = get_identifier (s, i) val arg = collect_string (ident, idents) in ast_node_t_nonnil @{ node_type = node_type, node_arg = g0u2u arg, node_left = ast_node_t_nil, node_right = ast_node_t_nil } end \| String () => let val @(quoted_string, _) = get_quoted_string (s, i) val arg = collect_string (quoted_string, strings) in ast_node_t_nonnil @{ node_type = node_type, node_arg = g0u2u arg, node_left = ast_node_t_nil, node_right = ast_node_t_nil } end \| _ => let val node_left = recurs (idents, strings) val node_right = recurs (idents, strings) in ast_node_t_nonnil @{ node_type = node_type, node_arg = g1i2u ARBITRARY_NODE_ARG, node_left = node_left, node_right = node_right } end end in recurs (idents, strings) end fn print_strings {n : int} (outf : FILEref, strings : !list_vt (string, n)) : void = let fun loop {m : nat} .<m>. (strings1 : !list_vt (string, m)) : void = case+ strings1 of \| NIL => () \| head :: tail => begin fprintln! (outf, head); loop tail end prval () = lemma_list_vt_param strings in loop strings end (------------------------------------------------------------------) #define ARBITRARY_INSTRUCTION_ARG 1234 #define ARBITRARY_JUMP_ARG 123456789 typedef instruction_t = @{ address = ullint, opcode = string, arg = llint } typedef code_t = ref instruction_t vtypedef pjump_t (p : addr) = (instruction_t @ p, instruction_t @ p -<lin,prf> void \| ptr p) vtypedef pjump_t = [p : addr] pjump_t p fn add_instruction (opcode : string, arg : llint, size : uint, code : &List0_vt code_t >> List1_vt code_t, pc : &ullint >> _) : void = let val instr = @{ address = pc, opcode = opcode, arg = arg } in code := (ref instr :: code); pc := pc + g0u2u size end fn add_jump (opcode : string, code : &List0_vt code_t >> List1_vt code_t, pc : &ullint >> _) : pjump_t = let val instr = @{ address = pc, opcode = opcode, arg = g1i2i ARBITRARY_JUMP_ARG } val ref_instr = ref instr in code := (ref_instr :: code); pc := pc + g0u2u 5U; ref_vtakeout ref_instr end fn fill_jump (pjump : pjump_t, address : ullint) : void = let val @(pf, fpf \| p) = pjump val instr0 = !p val jump_offset : llint = let val from = succ (instr0.address) and to = address in if from <= to then g0u2i (to - from) else ~g0u2i (from - to) end val instr1 = @{ address = instr0.address, opcode = instr0.opcode, arg = jump_offset } val () = !p := instr1 prval () = fpf pf in end fn add_filled_jump (opcode : string, address : ullint, code : &List0_vt code_t >> List1_vt code_t, pc : &ullint >> _) : void = let val pjump = add_jump (opcode, code, pc) in fill_jump (pjump, address) end fn generate_code (ast : ast_node_t) : List_vt code_t = let fnx traverse (ast : ast_node_t, code : &List0_vt code_t >> _, pc : &ullint >> _) : void = ( Generate the code by consing a list. ) case+ ast of \| ast_node_t_nil () => () \| ast_node_t_nonnil contents => begin case+ contents.node_type of \| NullNode () => $raise internal_error () \| If () => if_then (contents, code, pc) \| While () => while_do (contents, code, pc) \| Sequence () => sequence (contents, code, pc) \| Assign () => assign (contents, code, pc) \| Identifier () => immediate ("fetch", contents, code, pc) \| Integer () => immediate ("push", contents, code, pc) \| String () => immediate ("push", contents, code, pc) \| Prtc () => unary_op ("prtc", contents, code, pc) \| Prti () => unary_op ("prti", contents, code, pc) \| Prts () => unary_op ("prts", contents, code, pc) \| Negate () => unary_op ("neg", contents, code, pc) \| Not () => unary_op ("not", contents, code, pc) \| Multiply () => binary_op ("mul", contents, code, pc) \| Divide () => binary_op ("div", contents, code, pc) \| Mod () => binary_op ("mod", contents, code, pc) \| Add () => binary_op ("add", contents, code, pc) \| Subtract () => binary_op ("sub", contents, code, pc) \| Less () => binary_op ("lt", contents, code, pc) \| LessEqual () => binary_op ("le", contents, code, pc) \| Greater () => binary_op ("gt", contents, code, pc) \| GreaterEqual () => binary_op ("ge", contents, code, pc) \| Equal () => binary_op ("eq", contents, code, pc) \| NotEqual () => binary_op ("ne", contents, code, pc) \| And () => binary_op ("and", contents, code, pc) \| Or () => binary_op ("or", contents, code, pc) end and if_then (contents : node_contents_t, code : &List0_vt code_t >> _, pc : &ullint >> _) : void = case- (contents.node_right) of \| ast_node_t_nonnil contents1 => let val condition = (contents.node_left) and true_branch = (contents1.node_left) and false_branch = (contents1.node_right) ( Generate code to evaluate the condition. ) val () = traverse (condition, code, pc); ( Generate a conditional jump. Where it goes to will be filled in later. ) val pjump = add_jump ("jz", code, pc) ( Generate code for the true branch. ) val () = traverse (true_branch, code, pc); in case+ false_branch of \| ast_node_t_nil () => begin ( There is no false branch. ) ( Fill in the conditional jump to come here. ) fill_jump (pjump, pc) end \| ast_node_t_nonnil _ => let ( There is a false branch. ) ( Generate an unconditional jump. Where it goes to will be filled in later. ) val pjump1 = add_jump ("jmp", code, pc) ( Fill in the conditional jump to come here. ) val () = fill_jump (pjump, pc) ( Generate code for the false branch. ) val () = traverse (false_branch, code, pc); ( Fill in the unconditional jump to come here. ) val () = fill_jump (pjump1, pc) in end end and while_do (contents : node_contents_t, code : &List0_vt code_t >> _, pc : &ullint >> _) : void = ( I would prefer to implement ‘while’ by putting the conditional jump at the end, and jumping to it to get into the loop. However, we need to generate not the code of our choice, but the reference result. The reference result has the conditional jump at the top. ) let ( Where to jump from the bottom of the loop. ) val loop_top_address = pc ( Generate code to evaluate the condition. ) val () = traverse (contents.node_left, code, pc) ( Generate a conditional jump. It will be filled in later to go past the end of the loop. ) val pjump = add_jump ("jz", code, pc) ( Generate code for the loop body. ) val () = traverse (contents.node_right, code, pc) ( Generate a jump to the top of the loop. ) val () = add_filled_jump ("jmp", loop_top_address, code, pc) ( Fill in the conditional jump to come here. ) val () = fill_jump (pjump, pc) in end and sequence (contents : node_contents_t, code : &List0_vt code_t >> _, pc : &ullint >> _) : void = begin traverse (contents.node_left, code, pc); traverse (contents.node_right, code, pc) end and assign (contents : node_contents_t, code : &List0_vt code_t >> _, pc : &ullint >> _) : void = case- contents.node_left of \| ast_node_t_nonnil ident_contents => let val variable_no = ident_contents.node_arg in traverse (contents.node_right, code, pc); add_instruction ("store", g0u2i variable_no, 5U, code, pc) end and immediate (opcode : string, contents : node_contents_t, code : &List0_vt code_t >> _, pc : &ullint >> _) : void = add_instruction (opcode, g0u2i (contents.node_arg), 5U, code, pc) and unary_op (opcode : string, contents : node_contents_t, code : &List0_vt code_t >> _, pc : &ullint >> _) : void = begin traverse (contents.node_left, code, pc); add_instruction (opcode, g0i2i ARBITRARY_INSTRUCTION_ARG, 1U, code, pc) end and binary_op (opcode : string, contents : node_contents_t, code : &List0_vt code_t >> _, pc : &ullint >> _) : void = begin traverse (contents.node_left, code, pc); traverse (contents.node_right, code, pc); add_instruction (opcode, g0i2i ARBITRARY_INSTRUCTION_ARG, 1U, code, pc) end var code : List_vt code_t = NIL var pc : ullint = g0i2u 0 in traverse (ast, code, pc); add_instruction ("halt", g0i2i ARBITRARY_INSTRUCTION_ARG, 1U, code, pc); ( The code is a cons-list, in decreasing-address order, so reverse it to put the instructions in increasing-address order. ) list_vt_reverse code end fn print_code (outf : FILEref, code : !List_vt code_t) : void = let fun loop {n : nat} .<n>. (code : !list_vt (code_t, n)) : void = case+ code of \| NIL => () \| ref_instr :: tail => let val @{ address = address, opcode = opcode, arg = arg } = !ref_instr in fprint! (outf, address, " "); fprint! (outf, opcode); if opcode = "push" then fprint! (outf, " ", arg) else if opcode = "fetch" \|\| opcode = "store" then fprint! (outf, " [", arg, "]") else if opcode = "jmp" \|\| opcode = "jz" then begin fprint! (outf, " (", arg, ") "); if arg < g1i2i 0 then let val offset : ullint = g0i2u (~arg) val () = assertloc (offset <= succ address) in fprint! (outf, succ address - offset) end else let val offset : ullint = g0i2u arg in fprint! (outf, succ address + offset) end end; fprintln! (outf); loop tail end prval () = lemma_list_vt_param code in loop code end (------------------------------------------------------------------) fn main_program (inpf : FILEref, outf : FILEref) : int = let var idents : List_vt string = NIL var strings : List_vt string = NIL val ast = load_ast (inpf, idents, strings) val code = generate_code ast val () = fprintln! (outf, "Datasize: ", length idents, " Strings: ", length strings) val () = print_strings (outf, strings) val () = print_code (outf, code) val () = free idents and () = free strings and () = free code in 0 end implement main (argc, argv) = let val inpfname = if 2 <= argc then $UN.cast{string} argv[1] else "-" val outfname = if 3 <= argc then $UN.cast{string} argv[2] else "-" val inpf = if (inpfname : string) = "-" then stdin_ref else fileref_open_exn (inpfname, file_mode_r) val outf = if (outfname : string) = "-" then stdout_ref else fileref_open_exn (outfname, file_mode_w) in main_program (inpf, outf) end (------------------------------------------------------------------) </syntaxhighlight> {{out\|case=count}} <pre>$ patscc -o gen -O3 -DATS_MEMALLOC_GCBDW gen-in-ATS.dats -latslib -lgc && ./gen < count.ast Datasize: 1 Strings: 2 "count is: " "\n" 0 push 1 5 store [0] 10 fetch [0] 15 push 10 20 lt 21 jz (43) 65 26 push 0 31 prts 32 fetch [0] 37 prti 38 push 1 43 prts 44 fetch [0] 49 push 1 54 add 55 store [0] 60 jmp (-51) 10 65 halt </pre> =={{header\|AWK}}== Tested with gawk 4.1.1 and mawk 1.3.4. <syntaxhighlight lang="awk"> ~~<lang AWK>~~ function error(msg) { printf("%s\n", msg) Line 1,276 ⟶ 2,250: list_code() } </syntaxhighlight> ~~</lang>~~ {{out\|case=count}} <b> Line 1,304 ⟶ 2,278: =={{header\|C}}== Tested with gcc 4.81 and later, compiles warning free with -Wall -Wextra <~~lang~~syntaxhighlight Clang="c">#include <stdlib.h> #include <stdio.h> #include <string.h> Line 1,677 ⟶ 2,651: return 0; }</~~lang~~syntaxhighlight> {{out\|case=While counter example}} Line 1,707 ⟶ 2,681: Code by Steve Williams. Tested with GnuCOBOL 2.2. <~~lang~~syntaxhighlight lang="cobol"> >>SOURCE FORMAT IS FREE identification division. > this code is dedicated to the public domain Line 2,358 ⟶ 3,332: . end program showhex. end program generator.</~~lang~~syntaxhighlight> {{out\|case=Count}} Line 2,386 ⟶ 3,360: =={{header\|Forth}}== Tested with Gforth 0.7.3 <~~lang~~syntaxhighlight ~~Forth~~lang="forth">CREATE BUF 0 , : PEEK BUF @ 0= IF KEY BUF ! THEN BUF @ ; : GETC PEEK 0 BUF ! ; Line 2,514 ⟶ 3,488: DUP 5 < IF CELLS .INT + @ EXECUTE ELSE DROP THEN CR REPEAT DROP R> DROP ; GENERATE EMIT BYE</~~lang~~syntaxhighlight> Passes all tests. =={{header\|Fortran}}== {{works with\|gfortran\|11.2.1}} Fortran 2008/2018 code with C preprocessing. On case-sensitive systems, if you call the source file gen.F90, with a capital F, then gfortran will know to use the C preprocessor. <syntaxhighlight lang="fortran">module compiler_type_kinds use, intrinsic :: iso_fortran_env, only: int32 use, intrinsic :: iso_fortran_env, only: int64 implicit none private ! Synonyms. integer, parameter, public :: size_kind = int64 integer, parameter, public :: length_kind = size_kind integer, parameter, public :: nk = size_kind ! Synonyms for character capable of storing a Unicode code point. integer, parameter, public :: unicode_char_kind = selected_char_kind ('ISO_10646') integer, parameter, public :: ck = unicode_char_kind ! Synonyms for integers capable of storing a Unicode code point. integer, parameter, public :: unicode_ichar_kind = int32 integer, parameter, public :: ick = unicode_ichar_kind ! Synonyms for integers in the virtual machine or the interpreter’s ! runtime. (The Rosetta Code task says integers in the virtual ! machine are 32-bit, but there is nothing in the task that prevents ! us using 64-bit integers in the compiler and interpreter.) integer, parameter, public :: runtime_int_kind = int64 integer, parameter, public :: rik = runtime_int_kind end module compiler_type_kinds module helper_procedures use, non_intrinsic :: compiler_type_kinds, only: nk, rik, ck implicit none private public :: new_storage_size public :: next_power_of_two public :: isspace public :: quoted_string public :: int32_to_vm_bytes public :: uint32_to_vm_bytes public :: int32_from_vm_bytes public :: uint32_from_vm_bytes character(1, kind = ck), parameter :: horizontal_tab_char = char (9, kind = ck) character(1, kind = ck), parameter :: linefeed_char = char (10, kind = ck) character(1, kind = ck), parameter :: vertical_tab_char = char (11, kind = ck) character(1, kind = ck), parameter :: formfeed_char = char (12, kind = ck) character(1, kind = ck), parameter :: carriage_return_char = char (13, kind = ck) character(1, kind = ck), parameter :: space_char = ck_' ' ! The following is correct for Unix and its relatives. character(1, kind = ck), parameter :: newline_char = linefeed_char character(1, kind = ck), parameter :: backslash_char = char (92, kind = ck) contains elemental function new_storage_size (length_needed) result (size) integer(kind = nk), intent(in) :: length_needed integer(kind = nk) :: size ! Increase storage by orders of magnitude. if (2_nk*32 < length_needed) then size = huge (1_nk) else size = next_power_of_two (length_needed) end if end function new_storage_size elemental function next_power_of_two (x) result (y) integer(kind = nk), intent(in) :: x integer(kind = nk) :: y ! ! It is assumed that no more than 64 bits are used. ! ! The branch-free algorithm is that of ! https://archive.is/nKxAc#RoundUpPowerOf2 ! ! Fill in bits until one less than the desired power of two is ! reached, and then add one. ! y = x - 1 y = ior (y, ishft (y, -1)) y = ior (y, ishft (y, -2)) y = ior (y, ishft (y, -4)) y = ior (y, ishft (y, -8)) y = ior (y, ishft (y, -16)) y = ior (y, ishft (y, -32)) y = y + 1 end function next_power_of_two elemental function isspace (ch) result (bool) character(1, kind = ck), intent(in) :: ch logical :: bool bool = (ch == horizontal_tab_char) .or. & & (ch == linefeed_char) .or. & & (ch == vertical_tab_char) .or. & & (ch == formfeed_char) .or. & & (ch == carriage_return_char) .or. & & (ch == space_char) end function isspace function quoted_string (str) result (qstr) character(, kind = ck), intent(in) :: str character(:, kind = ck), allocatable :: qstr integer(kind = nk) :: n, i, j ! Compute n = the size of qstr. n = 2_nk do i = 1_nk, len (str, kind = nk) select case (str(i:i)) case (newline_char, backslash_char) n = n + 2 case default n = n + 1 end select end do allocate (character(n, kind = ck) :: qstr) ! Quote the string. qstr(1:1) = ck_'"' j = 2_nk do i = 1_nk, len (str, kind = nk) select case (str(i:i)) case (newline_char) qstr(j:j) = backslash_char qstr((j + 1):(j + 1)) = ck_'n' j = j + 2 case (backslash_char) qstr(j:j) = backslash_char qstr((j + 1):(j + 1)) = backslash_char j = j + 2 case default qstr(j:j) = str(i:i) j = j + 1 end select end do if (j /= n) error stop ! Check code correctness. qstr(n:n) = ck_'"' end function quoted_string subroutine int32_to_vm_bytes (n, bytes, i) integer(kind = rik), intent(in) :: n character(1), intent(inout) :: bytes(0:) integer(kind = rik), intent(in) :: i ! ! The virtual machine is presumed to be little-endian. Because I ! slightly prefer little-endian. ! bytes(i) = achar (ibits (n, 0, 8)) bytes(i + 1) = achar (ibits (n, 8, 8)) bytes(i + 2) = achar (ibits (n, 16, 8)) bytes(i + 3) = achar (ibits (n, 24, 8)) end subroutine int32_to_vm_bytes subroutine uint32_to_vm_bytes (n, bytes, i) integer(kind = rik), intent(in) :: n character(1), intent(inout) :: bytes(0:) integer(kind = rik), intent(in) :: i call int32_to_vm_bytes (n, bytes, i) end subroutine uint32_to_vm_bytes subroutine int32_from_vm_bytes (n, bytes, i) integer(kind = rik), intent(out) :: n character(1), intent(in) :: bytes(0:) integer(kind = rik), intent(in) :: i ! ! The virtual machine is presumed to be little-endian. Because I ! slightly prefer little-endian. ! call uint32_from_vm_bytes (n, bytes, i) if (ibits (n, 31, 1) == 1) then ! Extend the sign bit. n = ior (n, not ((2_rik * 32) - 1)) end if end subroutine int32_from_vm_bytes subroutine uint32_from_vm_bytes (n, bytes, i) integer(kind = rik), intent(out) :: n character(1), intent(in) :: bytes(0:) integer(kind = rik), intent(in) :: i ! ! The virtual machine is presumed to be little-endian. Because I ! slightly prefer little-endian. ! integer(kind = rik) :: n0, n1, n2, n3 n0 = iachar (bytes(i), kind = rik) n1 = ishft (iachar (bytes(i + 1), kind = rik), 8) n2 = ishft (iachar (bytes(i + 2), kind = rik), 16) n3 = ishft (iachar (bytes(i + 3), kind = rik), 24) n = ior (n0, ior (n1, ior (n2, n3))) end subroutine uint32_from_vm_bytes end module helper_procedures module string_buffers use, intrinsic :: iso_fortran_env, only: error_unit use, intrinsic :: iso_fortran_env, only: int64 use, non_intrinsic :: compiler_type_kinds, only: nk, ck, ick use, non_intrinsic :: helper_procedures implicit none private public :: strbuf_t public :: skip_whitespace public :: skip_non_whitespace public :: skip_whitespace_backwards public :: at_end_of_line type :: strbuf_t integer(kind = nk), private :: len = 0 ! ! ‘chars’ is made public for efficient access to the individual ! characters. ! character(1, kind = ck), allocatable, public :: chars(:) contains procedure, pass, private :: ensure_storage => strbuf_t_ensure_storage procedure, pass :: to_unicode_full_string => strbuf_t_to_unicode_full_string procedure, pass :: to_unicode_substring => strbuf_t_to_unicode_substring procedure, pass :: length => strbuf_t_length procedure, pass :: set => strbuf_t_set procedure, pass :: append => strbuf_t_append generic :: to_unicode => to_unicode_full_string generic :: to_unicode => to_unicode_substring generic :: assignment(=) => set end type strbuf_t contains function strbuf_t_to_unicode_full_string (strbuf) result (s) class(strbuf_t), intent(in) :: strbuf character(:, kind = ck), allocatable :: s ! ! This does not actually ensure that the string is valid Unicode; ! any 31-bit ‘character’ is supported. ! integer(kind = nk) :: i allocate (character(len = strbuf%len, kind = ck) :: s) do i = 1, strbuf%len s(i:i) = strbuf%chars(i) end do end function strbuf_t_to_unicode_full_string function strbuf_t_to_unicode_substring (strbuf, i, j) result (s) ! ! ‘Extreme’ values of i and j are allowed, as shortcuts for ‘from ! the beginning’, ‘up to the end’, or ‘empty substring’. ! class(strbuf_t), intent(in) :: strbuf integer(kind = nk), intent(in) :: i, j character(:, kind = ck), allocatable :: s ! ! This does not actually ensure that the string is valid Unicode; ! any 31-bit ‘character’ is supported. ! integer(kind = nk) :: i1, j1 integer(kind = nk) :: n integer(kind = nk) :: k i1 = max (1_nk, i) j1 = min (strbuf%len, j) n = max (0_nk, (j1 - i1) + 1_nk) allocate (character(n, kind = ck) :: s) do k = 1, n s(k:k) = strbuf%chars(i1 + (k - 1_nk)) end do end function strbuf_t_to_unicode_substring elemental function strbuf_t_length (strbuf) result (n) class(strbuf_t), intent(in) :: strbuf integer(kind = nk) :: n n = strbuf%len end function strbuf_t_length subroutine strbuf_t_ensure_storage (strbuf, length_needed) class(strbuf_t), intent(inout) :: strbuf integer(kind = nk), intent(in) :: length_needed integer(kind = nk) :: len_needed integer(kind = nk) :: new_size type(strbuf_t) :: new_strbuf len_needed = max (length_needed, 1_nk) if (.not. allocated (strbuf%chars)) then ! Initialize a new strbuf%chars array. new_size = new_storage_size (len_needed) allocate (strbuf%chars(1:new_size)) else if (ubound (strbuf%chars, 1) < len_needed) then ! Allocate a new strbuf%chars array, larger than the current ! one, but containing the same characters. new_size = new_storage_size (len_needed) allocate (new_strbuf%chars(1:new_size)) new_strbuf%chars(1:strbuf%len) = strbuf%chars(1:strbuf%len) call move_alloc (new_strbuf%chars, strbuf%chars) end if end subroutine strbuf_t_ensure_storage subroutine strbuf_t_set (dst, src) class(strbuf_t), intent(inout) :: dst class(), intent(in) :: src integer(kind = nk) :: n integer(kind = nk) :: i select type (src) type is (character(, kind = ck)) n = len (src, kind = nk) call dst%ensure_storage(n) do i = 1, n dst%chars(i) = src(i:i) end do dst%len = n type is (character()) n = len (src, kind = nk) call dst%ensure_storage(n) do i = 1, n dst%chars(i) = src(i:i) end do dst%len = n class is (strbuf_t) n = src%len call dst%ensure_storage(n) dst%chars(1:n) = src%chars(1:n) dst%len = n class default error stop end select end subroutine strbuf_t_set subroutine strbuf_t_append (dst, src) class(strbuf_t), intent(inout) :: dst class(), intent(in) :: src integer(kind = nk) :: n_dst, n_src, n integer(kind = nk) :: i select type (src) type is (character(, kind = ck)) n_dst = dst%len n_src = len (src, kind = nk) n = n_dst + n_src call dst%ensure_storage(n) do i = 1, n_src dst%chars(n_dst + i) = src(i:i) end do dst%len = n type is (character()) n_dst = dst%len n_src = len (src, kind = nk) n = n_dst + n_src call dst%ensure_storage(n) do i = 1, n_src dst%chars(n_dst + i) = src(i:i) end do dst%len = n class is (strbuf_t) n_dst = dst%len n_src = src%len n = n_dst + n_src call dst%ensure_storage(n) dst%chars((n_dst + 1):n) = src%chars(1:n_src) dst%len = n class default error stop end select end subroutine strbuf_t_append function skip_whitespace (strbuf, i) result (j) class(strbuf_t), intent(in) :: strbuf integer(kind = nk), intent(in) :: i integer(kind = nk) :: j logical :: done j = i done = .false. do while (.not. done) if (at_end_of_line (strbuf, j)) then done = .true. else if (.not. isspace (strbuf%chars(j))) then done = .true. else j = j + 1 end if end do end function skip_whitespace function skip_non_whitespace (strbuf, i) result (j) class(strbuf_t), intent(in) :: strbuf integer(kind = nk), intent(in) :: i integer(kind = nk) :: j logical :: done j = i done = .false. do while (.not. done) if (at_end_of_line (strbuf, j)) then done = .true. else if (isspace (strbuf%chars(j))) then done = .true. else j = j + 1 end if end do end function skip_non_whitespace function skip_whitespace_backwards (strbuf, i) result (j) class(strbuf_t), intent(in) :: strbuf integer(kind = nk), intent(in) :: i integer(kind = nk) :: j logical :: done j = i done = .false. do while (.not. done) if (j == -1) then done = .true. else if (.not. isspace (strbuf%chars(j))) then done = .true. else j = j - 1 end if end do end function skip_whitespace_backwards function at_end_of_line (strbuf, i) result (bool) class(strbuf_t), intent(in) :: strbuf integer(kind = nk), intent(in) :: i logical :: bool bool = (strbuf%length() < i) end function at_end_of_line end module string_buffers module reading_one_line_from_a_stream use, intrinsic :: iso_fortran_env, only: input_unit use, intrinsic :: iso_fortran_env, only: error_unit use, non_intrinsic :: compiler_type_kinds, only: nk, ck, ick use, non_intrinsic :: string_buffers implicit none private ! get_line_from_stream: read an entire input line from a stream into ! a strbuf_t. public :: get_line_from_stream character(1, kind = ck), parameter :: linefeed_char = char (10, kind = ck) ! The following is correct for Unix and its relatives. character(1, kind = ck), parameter :: newline_char = linefeed_char contains subroutine get_line_from_stream (unit_no, eof, no_newline, strbuf) integer, intent(in) :: unit_no logical, intent(out) :: eof ! End of file? logical, intent(out) :: no_newline ! There is a line but it has no ! newline? (Thus eof also must ! be .true.) class(strbuf_t), intent(inout) :: strbuf character(1, kind = ck) :: ch strbuf = '' call get_ch (unit_no, eof, ch) do while (.not. eof .and. ch /= newline_char) call strbuf%append (ch) call get_ch (unit_no, eof, ch) end do no_newline = eof .and. (strbuf%length() /= 0) end subroutine get_line_from_stream subroutine get_ch (unit_no, eof, ch) ! ! Read a single code point from the stream. ! ! Currently this procedure simply inputs ‘ASCII’ bytes rather than ! Unicode code points. ! integer, intent(in) :: unit_no logical, intent(out) :: eof character(1, kind = ck), intent(out) :: ch integer :: stat character(1) :: c = '' eof = .false. if (unit_no == input_unit) then call get_input_unit_char (c, stat) else read (unit = unit_no, iostat = stat) c end if if (stat < 0) then ch = ck_'' eof = .true. else if (0 < stat) then write (error_unit, '("Input error with status code ", I0)') stat stop 1 else ch = char (ichar (c, kind = ick), kind = ck) end if end subroutine get_ch !!! !!! If you tell gfortran you want -std=f2008 or -std=f2018, you likely !!! will need to add also -fall-intrinsics or -U__GFORTRAN__ !!! !!! The first way, you get the FGETC intrinsic. The latter way, you !!! get the C interface code that uses getchar(3). !!! #ifdef __GFORTRAN__ subroutine get_input_unit_char (c, stat) ! ! The following works if you are using gfortran. ! ! (FGETC is considered a feature for backwards compatibility with ! g77. However, I know of no way to reconfigure input_unit as a ! Fortran 2003 stream, for use with ordinary ‘read’.) ! character, intent(inout) :: c integer, intent(out) :: stat call fgetc (input_unit, c, stat) end subroutine get_input_unit_char #else subroutine get_input_unit_char (c, stat) ! ! An alternative implementation of get_input_unit_char. This ! actually reads input from the C standard input, which might not ! be the same as input_unit. ! use, intrinsic :: iso_c_binding, only: c_int character, intent(inout) :: c integer, intent(out) :: stat interface ! ! Use getchar(3) to read characters from standard input. This ! assumes there is actually such a function available, and that ! getchar(3) does not exist solely as a macro. (One could write ! one’s own getchar() if necessary, of course.) ! function getchar () result (c) bind (c, name = 'getchar') use, intrinsic :: iso_c_binding, only: c_int integer(kind = c_int) :: c end function getchar end interface integer(kind = c_int) :: i_char i_char = getchar () ! ! The C standard requires that EOF have a negative value. If the ! value returned by getchar(3) is not EOF, then it will be ! representable as an unsigned char. Therefore, to check for end ! of file, one need only test whether i_char is negative. ! if (i_char < 0) then stat = -1 else stat = 0 c = char (i_char) end if end subroutine get_input_unit_char #endif end module reading_one_line_from_a_stream module ast_reader ! ! The AST will be read into an array. Perhaps that will improve ! locality, compared to storing the AST as many linked heap nodes. ! ! In any case, implementing the AST this way is an interesting ! problem. ! use, intrinsic :: iso_fortran_env, only: input_unit use, intrinsic :: iso_fortran_env, only: output_unit use, intrinsic :: iso_fortran_env, only: error_unit use, non_intrinsic :: compiler_type_kinds, only: nk, ck, ick, rik use, non_intrinsic :: helper_procedures, only: next_power_of_two use, non_intrinsic :: helper_procedures, only: new_storage_size use, non_intrinsic :: string_buffers use, non_intrinsic :: reading_one_line_from_a_stream implicit none private public :: string_table_t public :: ast_node_t public :: ast_t public :: read_ast integer, parameter, public :: node_Nil = 0 integer, parameter, public :: node_Identifier = 1 integer, parameter, public :: node_String = 2 integer, parameter, public :: node_Integer = 3 integer, parameter, public :: node_Sequence = 4 integer, parameter, public :: node_If = 5 integer, parameter, public :: node_Prtc = 6 integer, parameter, public :: node_Prts = 7 integer, parameter, public :: node_Prti = 8 integer, parameter, public :: node_While = 9 integer, parameter, public :: node_Assign = 10 integer, parameter, public :: node_Negate = 11 integer, parameter, public :: node_Not = 12 integer, parameter, public :: node_Multiply = 13 integer, parameter, public :: node_Divide = 14 integer, parameter, public :: node_Mod = 15 integer, parameter, public :: node_Add = 16 integer, parameter, public :: node_Subtract = 17 integer, parameter, public :: node_Less = 18 integer, parameter, public :: node_LessEqual = 19 integer, parameter, public :: node_Greater = 20 integer, parameter, public :: node_GreaterEqual = 21 integer, parameter, public :: node_Equal = 22 integer, parameter, public :: node_NotEqual = 23 integer, parameter, public :: node_And = 24 integer, parameter, public :: node_Or = 25 type :: string_table_element_t character(:, kind = ck), allocatable :: str end type string_table_element_t type :: string_table_t integer(kind = nk), private :: len = 0_nk type(string_table_element_t), allocatable, private :: strings(:) contains procedure, pass, private :: ensure_storage => string_table_t_ensure_storage procedure, pass :: look_up_index => string_table_t_look_up_index procedure, pass :: look_up_string => string_table_t_look_up_string procedure, pass :: length => string_table_t_length generic :: look_up => look_up_index generic :: look_up => look_up_string end type string_table_t type :: ast_node_t integer :: node_variety ! Runtime integer, symbol index, or string index. integer(kind = rik) :: int ! The left branch begins at the next node. The right branch ! begins at the address of the left branch, plus the following. integer(kind = nk) :: right_branch_offset end type ast_node_t type :: ast_t integer(kind = nk), private :: len = 0_nk type(ast_node_t), allocatable, public :: nodes(:) contains procedure, pass, private :: ensure_storage => ast_t_ensure_storage end type ast_t contains subroutine string_table_t_ensure_storage (table, length_needed) class(string_table_t), intent(inout) :: table integer(kind = nk), intent(in) :: length_needed integer(kind = nk) :: len_needed integer(kind = nk) :: new_size type(string_table_t) :: new_table len_needed = max (length_needed, 1_nk) if (.not. allocated (table%strings)) then ! Initialize a new table%strings array. new_size = new_storage_size (len_needed) allocate (table%strings(1:new_size)) else if (ubound (table%strings, 1) < len_needed) then ! Allocate a new table%strings array, larger than the current ! one, but containing the same strings. new_size = new_storage_size (len_needed) allocate (new_table%strings(1:new_size)) new_table%strings(1:table%len) = table%strings(1:table%len) call move_alloc (new_table%strings, table%strings) end if end subroutine string_table_t_ensure_storage elemental function string_table_t_length (table) result (len) class(string_table_t), intent(in) :: table integer(kind = nk) :: len len = table%len end function string_table_t_length function string_table_t_look_up_index (table, str) result (index) class(string_table_t), intent(inout) :: table character(, kind = ck), intent(in) :: str integer(kind = rik) :: index ! ! This implementation simply stores the strings sequentially into ! an array. Obviously, for large numbers of strings, one might ! wish to do something more complex. ! ! Standard Fortran does not come, out of the box, with a massive ! runtime library for doing such things. They are, however, no ! longer nearly as challenging to implement in Fortran as they ! used to be. ! integer(kind = nk) :: i i = 1 index = 0 do while (index == 0) if (i == table%len + 1) then ! The string is new and must be added to the table. i = table%len + 1 if (huge (1_rik) < i) then ! String indices are assumed to be storable as runtime ! integers. write (error_unit, '("string_table_t capacity exceeded")') stop 1 end if call table%ensure_storage(i) table%len = i allocate (table%strings(i)%str, source = str) index = int (i, kind = rik) else if (table%strings(i)%str == str) then index = int (i, kind = rik) else i = i + 1 end if end do end function string_table_t_look_up_index function string_table_t_look_up_string (table, index) result (str) class(string_table_t), intent(inout) :: table integer(kind = rik), intent(in) :: index character(:, kind = ck), allocatable :: str ! ! This is the reverse of string_table_t_look_up_index: given an ! index, find the string. ! if (index < 1 .or. table%len < index) then ! In correct code, this branch should never be reached. error stop else allocate (str, source = table%strings(index)%str) end if end function string_table_t_look_up_string subroutine ast_t_ensure_storage (ast, length_needed) class(ast_t), intent(inout) :: ast integer(kind = nk), intent(in) :: length_needed integer(kind = nk) :: len_needed integer(kind = nk) :: new_size type(ast_t) :: new_ast len_needed = max (length_needed, 1_nk) if (.not. allocated (ast%nodes)) then ! Initialize a new ast%nodes array. new_size = new_storage_size (len_needed) allocate (ast%nodes(1:new_size)) else if (ubound (ast%nodes, 1) < len_needed) then ! Allocate a new ast%nodes array, larger than the current one, ! but containing the same nodes. new_size = new_storage_size (len_needed) allocate (new_ast%nodes(1:new_size)) new_ast%nodes(1:ast%len) = ast%nodes(1:ast%len) call move_alloc (new_ast%nodes, ast%nodes) end if end subroutine ast_t_ensure_storage subroutine read_ast (unit_no, strbuf, ast, symtab, strtab) integer, intent(in) :: unit_no type(strbuf_t), intent(inout) :: strbuf type(ast_t), intent(inout) :: ast type(string_table_t), intent(inout) :: symtab type(string_table_t), intent(inout) :: strtab logical :: eof logical :: no_newline integer(kind = nk) :: after_ast_address ast%len = 0 symtab%len = 0 strtab%len = 0 call build_subtree (1_nk, after_ast_address) contains recursive subroutine build_subtree (here_address, after_subtree_address) integer(kind = nk), value :: here_address integer(kind = nk), intent(out) :: after_subtree_address integer :: node_variety integer(kind = nk) :: i, j integer(kind = nk) :: left_branch_address integer(kind = nk) :: right_branch_address ! Get a line from the parser output. call get_line_from_stream (unit_no, eof, no_newline, strbuf) if (eof) then call ast_error else ! Prepare to store a new node. call ast%ensure_storage(here_address) ast%len = here_address ! What sort of node is it? i = skip_whitespace (strbuf, 1_nk) j = skip_non_whitespace (strbuf, i) node_variety = strbuf_to_node_variety (strbuf, i, j - 1) ast%nodes(here_address)%node_variety = node_variety select case (node_variety) case (node_Nil) after_subtree_address = here_address + 1 case (node_Identifier) i = skip_whitespace (strbuf, j) j = skip_non_whitespace (strbuf, i) ast%nodes(here_address)%int = & & strbuf_to_symbol_index (strbuf, i, j - 1, symtab) after_subtree_address = here_address + 1 case (node_String) i = skip_whitespace (strbuf, j) j = skip_whitespace_backwards (strbuf, strbuf%length()) ast%nodes(here_address)%int = & & strbuf_to_string_index (strbuf, i, j, strtab) after_subtree_address = here_address + 1 case (node_Integer) i = skip_whitespace (strbuf, j) j = skip_non_whitespace (strbuf, i) ast%nodes(here_address)%int = strbuf_to_int (strbuf, i, j - 1) after_subtree_address = here_address + 1 case default ! The node is internal, and has left and right branches. ! The left branch will start at left_branch_address; the ! right branch will start at left_branch_address + ! right_side_offset. left_branch_address = here_address + 1 ! Build the left branch. call build_subtree (left_branch_address, right_branch_address) ! Build the right_branch. call build_subtree (right_branch_address, after_subtree_address) ast%nodes(here_address)%right_branch_offset = & & right_branch_address - left_branch_address end select end if end subroutine build_subtree end subroutine read_ast function strbuf_to_node_variety (strbuf, i, j) result (node_variety) class(strbuf_t), intent(in) :: strbuf integer(kind = nk), intent(in) :: i, j integer :: node_variety ! ! This function has not been optimized in any way, unless the ! Fortran compiler can optimize it. ! ! Something like a ‘radix tree search’ could be done on the ! characters of the strbuf. Or a perfect hash function. Or a ! binary search. Etc. ! if (j == i - 1) then call ast_error else select case (strbuf%to_unicode(i, j)) case (ck_";") node_variety = node_Nil case (ck_"Identifier") node_variety = node_Identifier case (ck_"String") node_variety = node_String case (ck_"Integer") node_variety = node_Integer case (ck_"Sequence") node_variety = node_Sequence case (ck_"If") node_variety = node_If case (ck_"Prtc") node_variety = node_Prtc case (ck_"Prts") node_variety = node_Prts case (ck_"Prti") node_variety = node_Prti case (ck_"While") node_variety = node_While case (ck_"Assign") node_variety = node_Assign case (ck_"Negate") node_variety = node_Negate case (ck_"Not") node_variety = node_Not case (ck_"Multiply") node_variety = node_Multiply case (ck_"Divide") node_variety = node_Divide case (ck_"Mod") node_variety = node_Mod case (ck_"Add") node_variety = node_Add case (ck_"Subtract") node_variety = node_Subtract case (ck_"Less") node_variety = node_Less case (ck_"LessEqual") node_variety = node_LessEqual case (ck_"Greater") node_variety = node_Greater case (ck_"GreaterEqual") node_variety = node_GreaterEqual case (ck_"Equal") node_variety = node_Equal case (ck_"NotEqual") node_variety = node_NotEqual case (ck_"And") node_variety = node_And case (ck_"Or") node_variety = node_Or case default call ast_error end select end if end function strbuf_to_node_variety function strbuf_to_symbol_index (strbuf, i, j, symtab) result (int) class(strbuf_t), intent(in) :: strbuf integer(kind = nk), intent(in) :: i, j type(string_table_t), intent(inout) :: symtab integer(kind = rik) :: int if (j == i - 1) then call ast_error else int = symtab%look_up(strbuf%to_unicode (i, j)) end if end function strbuf_to_symbol_index function strbuf_to_int (strbuf, i, j) result (int) class(strbuf_t), intent(in) :: strbuf integer(kind = nk), intent(in) :: i, j integer(kind = rik) :: int integer :: stat character(:, kind = ck), allocatable :: str if (j < i) then call ast_error else allocate (character(len = (j - i) + 1_nk, kind = ck) :: str) str = strbuf%to_unicode (i, j) read (str, , iostat = stat) int if (stat /= 0) then call ast_error end if end if end function strbuf_to_int function strbuf_to_string_index (strbuf, i, j, strtab) result (int) class(strbuf_t), intent(in) :: strbuf integer(kind = nk), intent(in) :: i, j type(string_table_t), intent(inout) :: strtab integer(kind = rik) :: int if (j == i - 1) then call ast_error else int = strtab%look_up(strbuf_to_string (strbuf, i, j)) end if end function strbuf_to_string_index function strbuf_to_string (strbuf, i, j) result (str) class(strbuf_t), intent(in) :: strbuf integer(kind = nk), intent(in) :: i, j character(:, kind = ck), allocatable :: str character(1, kind = ck), parameter :: linefeed_char = char (10, kind = ck) character(1, kind = ck), parameter :: backslash_char = char (92, kind = ck) ! The following is correct for Unix and its relatives. character(1, kind = ck), parameter :: newline_char = linefeed_char integer(kind = nk) :: k integer(kind = nk) :: count if (strbuf%chars(i) /= ck_'"' .or. strbuf%chars(j) /= ck_'"') then call ast_error else ! Count how many characters are needed. count = 0 k = i + 1 do while (k < j) count = count + 1 if (strbuf%chars(k) == backslash_char) then k = k + 2 else k = k + 1 end if end do allocate (character(len = count, kind = ck) :: str) count = 0 k = i + 1 do while (k < j) if (strbuf%chars(k) == backslash_char) then if (k == j - 1) then call ast_error else select case (strbuf%chars(k + 1)) case (ck_'n') count = count + 1 str(count:count) = newline_char case (backslash_char) count = count + 1 str(count:count) = backslash_char case default call ast_error end select k = k + 2 end if else count = count + 1 str(count:count) = strbuf%chars(k) k = k + 1 end if end do end if end function strbuf_to_string subroutine ast_error ! ! It might be desirable to give more detail. ! write (error_unit, '("The AST input seems corrupted.")') stop 1 end subroutine ast_error end module ast_reader module code_generation ! ! First we generate code as if the virtual machine itself were part ! of this program. Then we disassemble the generated code. ! ! Because we are targeting only the one output language, this seems ! an easy way to perform the task. ! ! ! A point worth noting: the virtual machine is a stack ! architecture. ! ! Stack architectures have a long history. Burroughs famously ! preferred stack architectures for running Algol programs. See, for ! instance, ! https://en.wikipedia.org/w/index.php?title=Burroughs_large_systems&oldid=1068076420 ! use, intrinsic :: iso_fortran_env, only: input_unit use, intrinsic :: iso_fortran_env, only: output_unit use, intrinsic :: iso_fortran_env, only: error_unit use, non_intrinsic :: compiler_type_kinds use, non_intrinsic :: helper_procedures use, non_intrinsic :: ast_reader implicit none private public :: generate_and_output_code public :: generate_code public :: output_code ! The virtual machine cannot handle integers of more than 32 bits, ! two’s-complement. integer(kind = rik), parameter :: vm_huge_negint = -(2_rik * 31_rik) integer(kind = rik), parameter :: vm_huge_posint = (2_rik ** 31_rik) - 1_rik ! Arbitrarily chosen opcodes. integer, parameter :: opcode_nop = 0 ! I think there should be a nop ! opcode, to reserve space for ! later hand-patching. :) integer, parameter :: opcode_halt = 1 ! Does the ‘halt’ instruction ! apply brakes to the drum? integer, parameter :: opcode_add = 2 integer, parameter :: opcode_sub = 3 integer, parameter :: opcode_mul = 4 integer, parameter :: opcode_div = 5 integer, parameter :: opcode_mod = 6 integer, parameter :: opcode_lt = 7 integer, parameter :: opcode_gt = 8 integer, parameter :: opcode_le = 9 integer, parameter :: opcode_ge = 10 integer, parameter :: opcode_eq = 11 integer, parameter :: opcode_ne = 12 integer, parameter :: opcode_and = 13 integer, parameter :: opcode_or = 14 integer, parameter :: opcode_neg = 15 integer, parameter :: opcode_not = 16 integer, parameter :: opcode_prtc = 17 integer, parameter :: opcode_prti = 18 integer, parameter :: opcode_prts = 19 integer, parameter :: opcode_fetch = 20 integer, parameter :: opcode_store = 21 integer, parameter :: opcode_push = 22 integer, parameter :: opcode_jmp = 23 integer, parameter :: opcode_jz = 24 character(8, kind = ck), parameter :: opcode_names(0:24) = & & (/ "nop ", & & "halt ", & & "add ", & & "sub ", & & "mul ", & & "div ", & & "mod ", & & "lt ", & & "gt ", & & "le ", & & "ge ", & & "eq ", & & "ne ", & & "and ", & & "or ", & & "neg ", & & "not ", & & "prtc ", & & "prti ", & & "prts ", & & "fetch ", & & "store ", & & "push ", & & "jmp ", & & "jz " /) type :: vm_code_t integer(kind = rik), private :: len = 0_rik character(1), allocatable :: bytes(:) contains procedure, pass, private :: ensure_storage => vm_code_t_ensure_storage procedure, pass :: length => vm_code_t_length end type vm_code_t contains subroutine vm_code_t_ensure_storage (code, length_needed) class(vm_code_t), intent(inout) :: code integer(kind = nk), intent(in) :: length_needed integer(kind = nk) :: len_needed integer(kind = nk) :: new_size type(vm_code_t) :: new_code len_needed = max (length_needed, 1_nk) if (.not. allocated (code%bytes)) then ! Initialize a new code%bytes array. new_size = new_storage_size (len_needed) allocate (code%bytes(0:(new_size - 1))) else if (ubound (code%bytes, 1) < len_needed - 1) then ! Allocate a new code%bytes array, larger than the current one, ! but containing the same bytes. new_size = new_storage_size (len_needed) allocate (new_code%bytes(0:(new_size - 1))) new_code%bytes(0:(code%len - 1)) = code%bytes(0:(code%len - 1)) call move_alloc (new_code%bytes, code%bytes) end if end subroutine vm_code_t_ensure_storage elemental function vm_code_t_length (code) result (len) class(vm_code_t), intent(in) :: code integer(kind = rik) :: len len = code%len end function vm_code_t_length subroutine generate_and_output_code (outp, ast, symtab, strtab) integer, intent(in) :: outp ! The unit to write the output to. type(ast_t), intent(in) :: ast type(string_table_t), intent(inout) :: symtab type(string_table_t), intent(inout) :: strtab type(vm_code_t) :: code integer(kind = rik) :: i_vm code%len = 0 i_vm = 0_rik call generate_code (ast, 1_nk, i_vm, code) call output_code (outp, symtab, strtab, code) end subroutine generate_and_output_code subroutine generate_code (ast, i_ast, i_vm, code) type(ast_t), intent(in) :: ast integer(kind = nk), intent(in) :: i_ast ! Index in the ast array. integer(kind = rik), intent(inout) :: i_vm ! Address in the virtual machine. type(vm_code_t), intent(inout) :: code call traverse (i_ast) ! Generate a halt instruction. call code%ensure_storage(i_vm + 1) code%bytes(i_vm) = achar (opcode_halt) i_vm = i_vm + 1 code%len = i_vm contains recursive subroutine traverse (i_ast) integer(kind = nk), intent(in) :: i_ast ! Index in the ast array. select case (ast%nodes(i_ast)%node_variety) case (node_Nil) continue case (node_Integer) block integer(kind = rik) :: int_value int_value = ast%nodes(i_ast)%int call ensure_integer_is_vm_compatible (int_value) call code%ensure_storage(i_vm + 5) code%bytes(i_vm) = achar (opcode_push) call int32_to_vm_bytes (int_value, code%bytes, i_vm + 1) i_vm = i_vm + 5 end block case (node_Identifier) block integer(kind = rik) :: variable_index ! In the best Fortran tradition, we indexed the variables ! starting at one; however, the virtual machine starts them ! at zero. So subtract 1. variable_index = ast%nodes(i_ast)%int - 1 call ensure_integer_is_vm_compatible (variable_index) call code%ensure_storage(i_vm + 5) code%bytes(i_vm) = achar (opcode_fetch) call uint32_to_vm_bytes (variable_index, code%bytes, i_vm + 1) i_vm = i_vm + 5 end block case (node_String) block integer(kind = rik) :: string_index ! In the best Fortran tradition, we indexed the strings ! starting at one; however, the virtual machine starts them ! at zero. So subtract 1. string_index = ast%nodes(i_ast)%int - 1 call ensure_integer_is_vm_compatible (string_index) call code%ensure_storage(i_vm + 5) code%bytes(i_vm) = achar (opcode_push) call uint32_to_vm_bytes (string_index, code%bytes, i_vm + 1) i_vm = i_vm + 5 end block case (node_Assign) block integer(kind = nk) :: i_left, i_right integer(kind = rik) :: variable_index i_left = left_branch (i_ast) i_right = right_branch (i_ast) ! In the best Fortran tradition, we indexed the variables ! starting at one; however, the virtual machine starts them ! at zero. So subtract 1. variable_index = ast%nodes(i_left)%int - 1 ! Create code to push the right side onto the stack call traverse (i_right) ! Create code to store that result into the variable on the ! left side. call ensure_node_variety (node_Identifier, ast%nodes(i_left)%node_variety) call ensure_integer_is_vm_compatible (variable_index) call code%ensure_storage(i_vm + 5) code%bytes(i_vm) = achar (opcode_store) call uint32_to_vm_bytes (variable_index, code%bytes, i_vm + 1) i_vm = i_vm + 5 end block case (node_Multiply) call traverse (left_branch (i_ast)) call traverse (right_branch (i_ast)) call code%ensure_storage(i_vm + 1) code%bytes(i_vm) = achar (opcode_mul) i_vm = i_vm + 1 case (node_Divide) call traverse (left_branch (i_ast)) call traverse (right_branch (i_ast)) call code%ensure_storage(i_vm + 1) code%bytes(i_vm) = achar (opcode_div) i_vm = i_vm + 1 case (node_Mod) call traverse (left_branch (i_ast)) call traverse (right_branch (i_ast)) call code%ensure_storage(i_vm + 1) code%bytes(i_vm) = achar (opcode_mod) i_vm = i_vm + 1 case (node_Add) call traverse (left_branch (i_ast)) call traverse (right_branch (i_ast)) call code%ensure_storage(i_vm + 1) code%bytes(i_vm) = achar (opcode_add) i_vm = i_vm + 1 case (node_Subtract) call traverse (left_branch (i_ast)) call traverse (right_branch (i_ast)) call code%ensure_storage(i_vm + 1) code%bytes(i_vm) = achar (opcode_sub) i_vm = i_vm + 1 case (node_Less) call traverse (left_branch (i_ast)) call traverse (right_branch (i_ast)) call code%ensure_storage(i_vm + 1) code%bytes(i_vm) = achar (opcode_lt) i_vm = i_vm + 1 case (node_LessEqual) call traverse (left_branch (i_ast)) call traverse (right_branch (i_ast)) call code%ensure_storage(i_vm + 1) code%bytes(i_vm) = achar (opcode_le) i_vm = i_vm + 1 case (node_Greater) call traverse (left_branch (i_ast)) call traverse (right_branch (i_ast)) call code%ensure_storage(i_vm + 1) code%bytes(i_vm) = achar (opcode_gt) i_vm = i_vm + 1 case (node_GreaterEqual) call traverse (left_branch (i_ast)) call traverse (right_branch (i_ast)) call code%ensure_storage(i_vm + 1) code%bytes(i_vm) = achar (opcode_ge) i_vm = i_vm + 1 case (node_Equal) call traverse (left_branch (i_ast)) call traverse (right_branch (i_ast)) call code%ensure_storage(i_vm + 1) code%bytes(i_vm) = achar (opcode_eq) i_vm = i_vm + 1 case (node_NotEqual) call traverse (left_branch (i_ast)) call traverse (right_branch (i_ast)) call code%ensure_storage(i_vm + 1) code%bytes(i_vm) = achar (opcode_ne) i_vm = i_vm + 1 case (node_Negate) call ensure_node_variety (node_Nil, & & ast%nodes(right_branch (i_ast))%node_variety) call traverse (left_branch (i_ast)) call code%ensure_storage(i_vm + 1) code%bytes(i_vm) = achar (opcode_neg) i_vm = i_vm + 1 case (node_Not) call ensure_node_variety (node_Nil, & & ast%nodes(right_branch (i_ast))%node_variety) call traverse (left_branch (i_ast)) call code%ensure_storage(i_vm + 1) code%bytes(i_vm) = achar (opcode_not) i_vm = i_vm + 1 case (node_And) ! ! This is not a short-circuiting AND and so differs from ! C. One would not notice the difference, except in side ! effects that (I believe) are not possible in our tiny ! language. ! ! Even in a language such as Fortran that has actual AND and ! OR operators, an optimizer may generate short-circuiting ! code and so spoil one’s expectations for side ! effects. (Therefore gfortran may issue a warning if you ! call an unpure function within an .AND. or ! .OR. expression.) ! ! A C equivalent to what we have our code generator doing ! (and to Fortran’s .AND. operator) might be something like ! ! #define AND(a, b) ((!!(a)) * (!!(b))) ! ! This macro takes advantage of the equivalence of AND to ! multiplication modulo 2. The ‘!!’ notations are a C idiom ! for converting values to 0 and 1. ! call traverse (left_branch (i_ast)) call traverse (right_branch (i_ast)) call code%ensure_storage(i_vm + 1) code%bytes(i_vm) = achar (opcode_and) i_vm = i_vm + 1 case (node_Or) ! ! This is not a short-circuiting OR and so differs from ! C. One would not notice the difference, except in side ! effects that (I believe) are not possible in our tiny ! language. ! ! Even in a language such as Fortran that has actual AND and ! OR operators, an optimizer may generate short-circuiting ! code and so spoil one’s expectations for side ! effects. (Therefore gfortran may issue a warning if you ! call an unpure function within an .AND. or ! .OR. expression.) ! ! A C equivalent to what we have our code generator doing ! (and to Fortran’s .OR. operator) might be something like ! ! #define OR(a, b) (!( (!(a)) * (!(b)) )) ! ! This macro takes advantage of the equivalence of AND to ! multiplication modulo 2, and the equivalence of OR(a,b) to ! !AND(!a,!b). One could instead take advantage of the ! equivalence of OR to addition modulo 2: ! ! #define OR(a, b) ( ( (!!(a)) + (!!(b)) ) & 1 ) ! call traverse (left_branch (i_ast)) call traverse (right_branch (i_ast)) call code%ensure_storage(i_vm + 1) code%bytes(i_vm) = achar (opcode_or) i_vm = i_vm + 1 case (node_If) block integer(kind = nk) :: i_left, i_right integer(kind = nk) :: i_right_then_left, i_right_then_right logical :: there_is_an_else_clause integer(kind = rik) :: fixup_address1 integer(kind = rik) :: fixup_address2 integer(kind = rik) :: relative_address i_left = left_branch (i_ast) i_right = right_branch (i_ast) call ensure_node_variety (node_If, ast%nodes(i_right)%node_variety) i_right_then_left = left_branch (i_right) i_right_then_right = right_branch (i_right) there_is_an_else_clause = & & (ast%nodes(i_right_then_right)%node_variety /= node_Nil) ! Generate code for the predicate. call traverse (i_left) ! Generate a conditional jump over the predicate-true code. call code%ensure_storage(i_vm + 5) code%bytes(i_vm) = achar (opcode_jz) call int32_to_vm_bytes (0_rik, code%bytes, i_vm + 1) fixup_address1 = i_vm + 1 i_vm = i_vm + 5 ! Generate the predicate-true code. call traverse (i_right_then_left) if (there_is_an_else_clause) then ! Generate an unconditional jump over the predicate-true ! code. call code%ensure_storage(i_vm + 5) code%bytes(i_vm) = achar (opcode_jmp) call int32_to_vm_bytes (0_rik, code%bytes, i_vm + 1) fixup_address2 = i_vm + 1 i_vm = i_vm + 5 ! Fix up the conditional jump, so it jumps to the ! predicate-false code. relative_address = i_vm - fixup_address1 call int32_to_vm_bytes (relative_address, code%bytes, fixup_address1) ! Generate the predicate-false code. call traverse (i_right_then_right) ! Fix up the unconditional jump, so it jumps past the ! predicate-false code. relative_address = i_vm - fixup_address2 call int32_to_vm_bytes (relative_address, code%bytes, fixup_address2) else ! Fix up the conditional jump, so it jumps past the ! predicate-true code. relative_address = i_vm - fixup_address1 call int32_to_vm_bytes (relative_address, code%bytes, fixup_address1) end if end block case (node_While) block ! ! Note there is another common way to translate a ! while-loop which is to put (logically inverted) predicate ! code after the loop-body code, followed by a ! conditional jump to the start of the loop. You start the ! loop by unconditionally jumping to the predicate code. ! ! If our VM had a ‘jnz’ instruction, that translation would ! almost certainly be slightly better than this one. Given ! that we do not have a ‘jnz’, the code would end up ! slightly enlarged; one would have to put ‘not’ before the ! ‘jz’ at the bottom of the loop. ! integer(kind = nk) :: i_left, i_right integer(kind = rik) :: loop_address integer(kind = rik) :: fixup_address integer(kind = rik) :: relative_address i_left = left_branch (i_ast) i_right = right_branch (i_ast) ! Generate code for the predicate. loop_address = i_vm call traverse (i_left) ! Generate a conditional jump out of the loop. call code%ensure_storage(i_vm + 5) code%bytes(i_vm) = achar (opcode_jz) call int32_to_vm_bytes (0_rik, code%bytes, i_vm + 1) fixup_address = i_vm + 1 i_vm = i_vm + 5 ! Generate code for the loop body. call traverse (i_right) ! Generate an unconditional jump to the top of the loop. call code%ensure_storage(i_vm + 5) code%bytes(i_vm) = achar (opcode_jmp) relative_address = loop_address - (i_vm + 1) call int32_to_vm_bytes (relative_address, code%bytes, i_vm + 1) i_vm = i_vm + 5 ! Fix up the conditional jump, so it jumps after the loop ! body. relative_address = i_vm - fixup_address call int32_to_vm_bytes (relative_address, code%bytes, fixup_address) end block case (node_Prtc) call ensure_node_variety (node_Nil, & & ast%nodes(right_branch (i_ast))%node_variety) call traverse (left_branch (i_ast)) call code%ensure_storage(i_vm + 1) code%bytes(i_vm) = achar (opcode_prtc) i_vm = i_vm + 1 case (node_Prti) call ensure_node_variety (node_Nil, & & ast%nodes(right_branch (i_ast))%node_variety) call traverse (left_branch (i_ast)) call code%ensure_storage(i_vm + 1) code%bytes(i_vm) = achar (opcode_prti) i_vm = i_vm + 1 case (node_Prts) call ensure_node_variety (node_Nil, & & ast%nodes(right_branch (i_ast))%node_variety) call traverse (left_branch (i_ast)) call code%ensure_storage(i_vm + 1) code%bytes(i_vm) = achar (opcode_prts) i_vm = i_vm + 1 case (node_Sequence) call traverse (left_branch (i_ast)) call traverse (right_branch (i_ast)) case default call bad_ast end select code%len = i_vm end subroutine traverse elemental function left_branch (i_here) result (i_left) integer(kind = nk), intent(in) :: i_here integer(kind = nk) :: i_left i_left = i_here + 1 end function left_branch elemental function right_branch (i_here) result (i_right) integer(kind = nk), intent(in) :: i_here integer(kind = nk) :: i_right i_right = i_here + 1 + ast%nodes(i_here)%right_branch_offset end function right_branch subroutine ensure_node_variety (expected_node_variety, found_node_variety) integer, intent(in) :: expected_node_variety integer, intent(in) :: found_node_variety if (expected_node_variety /= found_node_variety) call bad_ast end subroutine ensure_node_variety subroutine bad_ast call codegen_error_message write (error_unit, '("unexpected abstract syntax")') stop 1 end subroutine bad_ast end subroutine generate_code subroutine output_code (outp, symtab, strtab, code) integer, intent(in) :: outp ! The unit to write the output to. type(string_table_t), intent(inout) :: symtab type(string_table_t), intent(inout) :: strtab type(vm_code_t), intent(in) :: code call write_header (outp, symtab%length(), strtab%length()) call write_strings (outp, strtab) call disassemble_instructions (outp, code) end subroutine output_code subroutine write_header (outp, data_size, strings_size) integer, intent(in) :: outp integer(kind = rik) :: data_size integer(kind = rik) :: strings_size call ensure_integer_is_vm_compatible (data_size) call ensure_integer_is_vm_compatible (strings_size) write (outp, '("Datasize: ", I0, " Strings: ", I0)') data_size, strings_size end subroutine write_header subroutine write_strings (outp, strtab) integer, intent(in) :: outp type(string_table_t), intent(inout) :: strtab integer(kind = rik) :: i do i = 1_rik, strtab%length() write (outp, '(1A)') quoted_string (strtab%look_up(i)) end do end subroutine write_strings subroutine disassemble_instructions (outp, code) integer, intent(in) :: outp type(vm_code_t), intent(in) :: code integer(kind = rik) :: i_vm integer :: opcode integer(kind = rik) :: n i_vm = 0_rik do while (i_vm /= code%length()) call write_vm_code_address (outp, i_vm) opcode = iachar (code%bytes(i_vm)) call write_vm_opcode (outp, opcode) select case (opcode) case (opcode_push) call int32_from_vm_bytes (n, code%bytes, i_vm + 1) call write_vm_int_literal (outp, n) i_vm = i_vm + 5 case (opcode_fetch, opcode_store) call uint32_from_vm_bytes (n, code%bytes, i_vm + 1) call write_vm_data_address (outp, n) i_vm = i_vm + 5 case (opcode_jmp, opcode_jz) call int32_from_vm_bytes (n, code%bytes, i_vm + 1) call write_vm_jump_address (outp, n, i_vm + 1) i_vm = i_vm + 5 case default i_vm = i_vm + 1 end select write (outp, '()', advance = 'yes') end do end subroutine disassemble_instructions subroutine write_vm_code_address (outp, i_vm) integer, intent(in) :: outp integer(kind = rik), intent(in) :: i_vm ! 10 characters is wide enough for any 32-bit unsigned number. write (outp, '(I10, 1X)', advance = 'no') i_vm end subroutine write_vm_code_address subroutine write_vm_opcode (outp, opcode) integer, intent(in) :: outp integer, intent(in) :: opcode character(8, kind = ck) :: opcode_name opcode_name = opcode_names(opcode) select case (opcode) case (opcode_push, opcode_fetch, opcode_store, opcode_jz, opcode_jmp) write (outp, '(1A)', advance = 'no') opcode_name(1:6) case default write (outp, '(1A)', advance = 'no') trim (opcode_name) end select end subroutine write_vm_opcode subroutine write_vm_int_literal (outp, n) integer, intent(in) :: outp integer(kind = rik), intent(in) :: n write (outp, '(I0)', advance = 'no') n end subroutine write_vm_int_literal subroutine write_vm_data_address (outp, i) integer, intent(in) :: outp integer(kind = rik), intent(in) :: i write (outp, '("[", I0, "]")', advance = 'no') i end subroutine write_vm_data_address subroutine write_vm_jump_address (outp, relative_address, i_vm) integer, intent(in) :: outp integer(kind = rik), intent(in) :: relative_address integer(kind = rik), intent(in) :: i_vm write (outp, '(" (", I0, ") ", I0)', advance = 'no') & & relative_address, i_vm + relative_address end subroutine write_vm_jump_address subroutine ensure_integer_is_vm_compatible (n) integer(kind = rik), intent(in) :: n ! ! It would seem desirable to check this in the syntax analyzer, ! instead, so line and column numbers can be given. But checking ! here will not hurt. ! if (n < vm_huge_negint .or. vm_huge_posint < n) then call codegen_error_message write (error_unit, '("integer is too large for the virtual machine: ", I0)') n stop 1 end if end subroutine ensure_integer_is_vm_compatible subroutine codegen_error_message write (error_unit, '("Code generation error: ")', advance = 'no') end subroutine codegen_error_message end module code_generation program gen use, intrinsic :: iso_fortran_env, only: input_unit use, intrinsic :: iso_fortran_env, only: output_unit use, intrinsic :: iso_fortran_env, only: error_unit use, non_intrinsic :: compiler_type_kinds use, non_intrinsic :: string_buffers use, non_intrinsic :: ast_reader use, non_intrinsic :: code_generation implicit none integer, parameter :: inp_unit_no = 100 integer, parameter :: outp_unit_no = 101 integer :: arg_count character(200) :: arg integer :: inp integer :: outp type(strbuf_t) :: strbuf type(ast_t) :: ast type(string_table_t) :: symtab type(string_table_t) :: strtab arg_count = command_argument_count () if (3 <= arg_count) then call print_usage else if (arg_count == 0) then inp = input_unit outp = output_unit else if (arg_count == 1) then call get_command_argument (1, arg) inp = open_for_input (trim (arg)) outp = output_unit else if (arg_count == 2) then call get_command_argument (1, arg) inp = open_for_input (trim (arg)) call get_command_argument (2, arg) outp = open_for_output (trim (arg)) end if call read_ast (inp, strbuf, ast, symtab, strtab) call generate_and_output_code (outp, ast, symtab, strtab) end if contains function open_for_input (filename) result (unit_no) character(), intent(in) :: filename integer :: unit_no integer :: stat open (unit = inp_unit_no, file = filename, status = 'old', & & action = 'read', access = 'stream', form = 'unformatted', & & iostat = stat) if (stat /= 0) then write (error_unit, '("Error: failed to open ", 1A, " for input")') filename stop 1 end if unit_no = inp_unit_no end function open_for_input function open_for_output (filename) result (unit_no) character(), intent(in) :: filename integer :: unit_no integer :: stat open (unit = outp_unit_no, file = filename, action = 'write', iostat = stat) if (stat /= 0) then write (error_unit, '("Error: failed to open ", 1A, " for output")') filename stop 1 end if unit_no = outp_unit_no end function open_for_output subroutine print_usage character(200) :: progname call get_command_argument (0, progname) write (output_unit, '("Usage: ", 1A, " [INPUT_FILE [OUTPUT_FILE]]")') & & trim (progname) end subroutine print_usage end program gen</syntaxhighlight> {{out}} $ ./lex compiler-tests/count.t \| ./parse \| ./gen <pre>Datasize: 1 Strings: 2 "count is: " "\n" 0 push 1 5 store [0] 10 fetch [0] 15 push 10 20 lt 21 jz (43) 65 26 push 0 31 prts 32 fetch [0] 37 prti 38 push 1 43 prts 44 fetch [0] 49 push 1 54 add 55 store [0] 60 jmp (-51) 10 65 halt</pre> =={{header\|Go}}== {{trans\|C}} <~~lang~~syntaxhighlight lang="go">package main import ( Line 2,911 ⟶ 5,795: codeFinish() listCode() }</~~lang~~syntaxhighlight> {{out}} Line 2,942 ⟶ 5,826: Implementation: <~~lang~~syntaxhighlight Jlang="j">require'format/printf' (opcodes)=: opcodes=: ;:{{)n Line 3,073 ⟶ 5,957: gen_code load_ast y list_code gen_op halt }}</~~lang~~syntaxhighlight> Count example: <syntaxhighlight lang="j"> ~~<lang J>~~ count=:{{)n count = 1; Line 3,107 ⟶ 5,991: 60 jmp (-51) 10 65 halt </syntaxhighlight> ~~</lang>~~ =={{header\|Java}}== {{trans\|Python}} <~~lang~~syntaxhighlight lang="java">package codegenerator; import java.io.File; Line 3,453 ⟶ 6,337: } } </syntaxhighlight> ~~</lang>~~ =={{header\|Julia}}== <~~lang~~syntaxhighlight lang="julia">import Base.show mutable struct Asm32 Line 3,618 ⟶ 6,502: compiletoasm(iob) </~~lang~~syntaxhighlight>{{output}}<pre> Datasize: 1 Strings: 2 "count is: " Line 3,643 ⟶ 6,527: =={{header\|M2000 Interpreter}}== <syntaxhighlight lang="m2000 interpreter"> ~~<lang M2000 Interpreter>~~ Module CodeGenerator (s$){ Function code$(op$) { Line 3,868 ⟶ 6,752: Integer 1 } </syntaxhighlight> ~~</lang>~~ {{out}} Line 3,897 ⟶ 6,781: =={{header\|Nim}}== <~~lang~~syntaxhighlight ~~Nim~~lang="nim">import os, re, streams, strformat, strutils, tables, std/decls type Line 4,207 ⟶ 7,091: if toClose: stream.close() codegen.run(ast)</~~lang~~syntaxhighlight> {{out}} Line 4,321 ⟶ 7,205: =={{header\|Perl}}== Tested with perl v5.26.1 <~~lang~~syntaxhighlight ~~Perl~~lang="perl">#!/usr/bin/perl use strict; # gen.pl - flatAST to stack machine code Line 4,360 ⟶ 7,244: print "Datasize: $namecount Strings: $stringcount\n"; print "$_\n" for sort { $strings{$a} <=> $strings{$b} } keys %strings; print;</~~lang~~syntaxhighlight> Passes all tests. Line 4,366 ⟶ 7,250: Reusing parse.e from the [[Compiler/syntax_analyzer#Phix\|Syntax Analyzer task]]<br> Deviates somewhat from the task specification in that it generates executable machine code. <!--<~~lang~~syntaxhighlight ~~Phix~~lang="phix">(notonline)--> <span style="color: #000080;font-style:italic;">-- -- demo\rosetta\Compiler\cgen.e Line 4,760 ⟶ 7,644: <span style="color: #008080;">end</span> <span style="color: #008080;">while</span> <span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span> <!--</~~lang~~syntaxhighlight>--> And a simple test driver for the specific task: <!--<~~lang~~syntaxhighlight ~~Phix~~lang="phix">(notonline)--> <span style="color: #000080;font-style:italic;">-- -- demo\rosetta\Compiler\cgen.exw Line 4,958 ⟶ 7,842: <span style="color: #000080;font-style:italic;">--main(command_line())</span> <span style="color: #000000;">main</span><span style="color: #0000FF;">({</span><span style="color: #000000;">0</span><span style="color: #0000FF;">,</span><span style="color: #000000;">0</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"gcd.c"</span><span style="color: #0000FF;">})</span> <!--</~~lang~~syntaxhighlight>--> {{out}} <pre> Line 4,996 ⟶ 7,880: =={{header\|Python}}== Tested with Python 2.7 and 3.x <~~lang~~syntaxhighlight ~~Python~~lang="python">from __future__ import print_function import sys, struct, shlex, operator Line 5,249 ⟶ 8,133: code_gen(n) code_finish() list_code()</~~lang~~syntaxhighlight> {{out\|case=While counter example}} Line 5,280 ⟶ 8,164: Using 'while-count' example, input used is here: [https://github.com/SqrtNegInf/Rosettacode-Perl6-Smoke/blob/master/ref/ast.txt ast.txt] {{trans\|Perl}} <syntaxhighlight lang="raku" ~~perl6~~line>my %opnames = < Less lt LessEqual le Multiply mul Subtract sub NotEqual ne Divide div GreaterEqual ge Equal eq Greater gt Negate neg Line 5,316 ⟶ 8,200: say "Datasize: $name-count Strings: $string-count\n" ~ join('', %strings.keys.sort.reverse «~» "\n") ~ $code;</~~lang~~syntaxhighlight> {{out}} <pre>Datasize: 1 Strings: 2 Line 5,339 ⟶ 8,223: 60 jmp (-51) 10 65 halt</pre> =={{header\|RATFOR}}== {{works with\|ratfor77\|[https://sourceforge.net/p/chemoelectric/ratfor77/ public domain 1.0]}} {{works with\|gfortran\|11.3.0}} {{works with\|f2c\|20100827}} <syntaxhighlight lang="ratfor">###################################################################### # # The Rosetta Code code generator in Ratfor 77. # # # In FORTRAN 77 and therefore in Ratfor 77, there is no way to specify # that a value should be put on a call stack. Therefore there is no # way to implement recursive algorithms in Ratfor 77 (although see the # Ratfor for the "syntax analyzer" task, where a recursive language is # implemented in Ratfor). We are forced to use non-recursive # algorithms. # # How to deal with FORTRAN 77 input is another problem. I use # formatted input, treating each line as an array of type # CHARACTER--regrettably of no more than some predetermined, finite # length. It is a very simple method and presents no significant # difficulties, aside from the restriction on line length of the # input. # # # On a POSIX platform, the program can be compiled with f2c and run # somewhat as follows: # # ratfor77 gen-in-ratfor.r > gen-in-ratfor.f # f2c -C -Nc80 gen-in-ratfor.f # cc gen-in-ratfor.c -lf2c # ./a.out < compiler-tests/primes.ast # # With gfortran, a little differently: # # ratfor77 gen-in-ratfor.r > gen-in-ratfor.f # gfortran -fcheck=all -std=legacy gen-in-ratfor.f # ./a.out < compiler-tests/primes.ast # # # I/O is strictly from default input and to default output, which, on # POSIX systems, usually correspond respectively to standard input and # standard output. (I did not wish to have to deal with unit numbers; # these are now standardized in ISO_FORTRAN_ENV, but that is not # available in FORTRAN 77.) # #--------------------------------------------------------------------- # Some parameters you may wish to modify. define(LINESZ, 256) # Size of an input line. define(OUTLSZ, 1024) # Size of an output line. define(STRNSZ, 4096) # Size of the string pool. define(NODSSZ, 4096) # Size of the nodes pool. define(STCKSZ, 4096) # Size of stacks. define(MAXVAR, 256) # Maximum number of variables. define(MAXSTR, 256) # Maximum number of strings. define(CODESZ, 16384) # Maximum size of a compiled program. #--------------------------------------------------------------------- define(NEWLIN, 10) # The Unix newline character (ASCII LF). define(DQUOTE, 34) # The double quote character. define(BACKSL, 92) # The backslash character. #--------------------------------------------------------------------- define(NODESZ, 3) define(NNEXTF, 1) # Index for next-free. define(NTAG, 1) # Index for the tag. # For an internal node -- define(NLEFT, 2) # Index for the left node. define(NRIGHT, 3) # Index for the right node. # For a leaf node -- define(NITV, 2) # Index for the string pool index. define(NITN, 3) # Length of the value. define(NIL, -1) # Nil node. define(RGT, 10000) define(STAGE2, 20000) define(STAGE3, 30000) define(STAGE4, 40000) # The following all must be less than RGT. define(NDID, 0) define(NDSTR, 1) define(NDINT, 2) define(NDSEQ, 3) define(NDIF, 4) define(NDPRTC, 5) define(NDPRTS, 6) define(NDPRTI, 7) define(NDWHIL, 8) define(NDASGN, 9) define(NDNEG, 10) define(NDNOT, 11) define(NDMUL, 12) define(NDDIV, 13) define(NDMOD, 14) define(NDADD, 15) define(NDSUB, 16) define(NDLT, 17) define(NDLE, 18) define(NDGT, 19) define(NDGE, 20) define(NDEQ, 21) define(NDNE, 22) define(NDAND, 23) define(NDOR, 24) define(OPHALT, 1) define(OPADD, 2) define(OPSUB, 3) define(OPMUL, 4) define(OPDIV, 5) define(OPMOD, 6) define(OPLT, 7) define(OPGT, 8) define(OPLE, 9) define(OPGE, 10) define(OPEQ, 11) define(OPNE, 12) define(OPAND, 13) define(OPOR, 14) define(OPNEG, 15) define(OPNOT, 16) define(OPPRTC, 17) define(OPPRTI, 18) define(OPPRTS, 19) define(OPFTCH, 20) define(OPSTOR, 21) define(OPPUSH, 22) define(OPJMP, 23) define(OPJZ, 24) #--------------------------------------------------------------------- function issp (c) # Is a character a space character? implicit none character c logical issp integer ic ic = ichar (c) issp = (ic == 32 \|\| (9 <= ic && ic <= 13)) end function skipsp (str, i, imax) # Skip past spaces in a string. implicit none character str() integer i integer imax integer skipsp logical issp logical done skipsp = i done = .false. while (!done) { if (imax <= skipsp) done = .true. else if (!issp (str(skipsp))) done = .true. else skipsp = skipsp + 1 } end function skipns (str, i, imax) # Skip past non-spaces in a string. implicit none character str() integer i integer imax integer skipns logical issp logical done skipns = i done = .false. while (!done) { if (imax <= skipns) done = .true. else if (issp (str(skipns))) done = .true. else skipns = skipns + 1 } end function trimrt (str, n) # Find the length of a string, if one ignores trailing spaces. implicit none character str() integer n integer trimrt logical issp logical done trimrt = n done = .false. while (!done) { if (trimrt == 0) done = .true. else if (!issp (str(trimrt))) done = .true. else trimrt = trimrt - 1 } end #--------------------------------------------------------------------- subroutine addstr (strngs, istrng, src, i0, n0, i, n) # Add a string to the string pool. implicit none character strngs(STRNSZ) # String pool. integer istrng # String pool's next slot. character src() # Source string. integer i0, n0 # Index and length in source string. integer i, n # Index and length in string pool. integer j if (STRNSZ < istrng + (n0 - 1)) { write (, '(''string pool exhausted'')') stop } if (n0 == 0) { i = 0 n = 0 } else { for (j = 0; j < n0; j = j + 1) strngs(istrng + j) = src(i0 + j) i = istrng n = n0 istrng = istrng + n0 } end #--------------------------------------------------------------------- subroutine push (stack, sp, i) implicit none integer stack(STCKSZ) integer sp # Stack pointer. integer i # Value to push. if (sp == STCKSZ) { write (, '(''stack overflow in push'')') stop } stack(sp) = i sp = sp + 1 end function pop (stack, sp) implicit none integer stack(STCKSZ) integer sp # Stack pointer. integer pop if (sp == 1) { write (, '(''stack underflow in pop'')') stop } sp = sp - 1 pop = stack(sp) end function nstack (sp) implicit none integer sp # Stack pointer. integer nstack nstack = sp - 1 # Current cardinality of the stack. end #--------------------------------------------------------------------- subroutine initnd (nodes, frelst) # Initialize the nodes pool. implicit none integer nodes (NODESZ, NODSSZ) integer frelst # Head of the free list. integer i for (i = 1; i < NODSSZ; i = i + 1) nodes(NNEXTF, i) = i + 1 nodes(NNEXTF, NODSSZ) = NIL frelst = 1 end subroutine newnod (nodes, frelst, i) # Get the index for a new node taken from the free list. integer nodes (NODESZ, NODSSZ) integer frelst # Head of the free list. integer i # Index of the new node. integer j if (frelst == NIL) { write (, '(''nodes pool exhausted'')') stop } i = frelst frelst = nodes(NNEXTF, frelst) for (j = 1; j <= NODESZ; j = j + 1) nodes(j, i) = 0 end subroutine frenod (nodes, frelst, i) # Return a node to the free list. integer nodes (NODESZ, NODSSZ) integer frelst # Head of the free list. integer i # Index of the node to free. nodes(NNEXTF, i) = frelst frelst = i end function strtag (str, i, n) implicit none character str() integer i, n integer strtag character16 s integer j for (j = 0; j < 16; j = j + 1) if (j < n) s(j + 1 : j + 1) = str(i + j) else s(j + 1 : j + 1) = ' ' if (s == "Identifier ") strtag = NDID else if (s == "String ") strtag = NDSTR else if (s == "Integer ") strtag = NDINT else if (s == "Sequence ") strtag = NDSEQ else if (s == "If ") strtag = NDIF else if (s == "Prtc ") strtag = NDPRTC else if (s == "Prts ") strtag = NDPRTS else if (s == "Prti ") strtag = NDPRTI else if (s == "While ") strtag = NDWHIL else if (s == "Assign ") strtag = NDASGN else if (s == "Negate ") strtag = NDNEG else if (s == "Not ") strtag = NDNOT else if (s == "Multiply ") strtag = NDMUL else if (s == "Divide ") strtag = NDDIV else if (s == "Mod ") strtag = NDMOD else if (s == "Add ") strtag = NDADD else if (s == "Subtract ") strtag = NDSUB else if (s == "Less ") strtag = NDLT else if (s == "LessEqual ") strtag = NDLE else if (s == "Greater ") strtag = NDGT else if (s == "GreaterEqual ") strtag = NDGE else if (s == "Equal ") strtag = NDEQ else if (s == "NotEqual ") strtag = NDNE else if (s == "And ") strtag = NDAND else if (s == "Or ") strtag = NDOR else if (s == "; ") strtag = NIL else { write (, '(''unrecognized input line: '', A16)') s stop } end subroutine readln (strngs, istrng, tag, iarg, narg) # Read a line of the AST input. implicit none character strngs(STRNSZ) # String pool. integer istrng # String pool's next slot. integer tag # The node tag or NIL. integer iarg # Index of an argument in the string pool. integer narg # Length of an argument in the string pool. integer trimrt integer strtag integer skipsp integer skipns character line(LINESZ) character20 fmt integer i, j, n # Read a line of text as an array of characters. write (fmt, '(''('', I10, ''A)'')') LINESZ read (, fmt) line n = trimrt (line, LINESZ) i = skipsp (line, 1, n + 1) j = skipns (line, i, n + 1) tag = strtag (line, i, j - i) i = skipsp (line, j, n + 1) call addstr (strngs, istrng, line, i, (n + 1) - i, iarg, narg) end function hasarg (tag) implicit none integer tag logical hasarg hasarg = (tag == NDID \|\| tag == NDINT \|\| tag == NDSTR) end subroutine rdast (strngs, istrng, nodes, frelst, iast) # Read in the AST. A non-recursive algorithm is used. implicit none character strngs(STRNSZ) # String pool. integer istrng # String pool's next slot. integer nodes (NODESZ, NODSSZ) # Nodes pool. integer frelst # Head of the free list. integer iast # Index of root node of the AST. integer nstack integer pop logical hasarg integer stack(STCKSZ) integer sp # Stack pointer. integer tag, iarg, narg integer i, j, k sp = 1 call readln (strngs, istrng, tag, iarg, narg) if (tag == NIL) iast = NIL else { call newnod (nodes, frelst, i) iast = i nodes(NTAG, i) = tag nodes(NITV, i) = 0 nodes(NITN, i) = 0 if (hasarg (tag)) { nodes(NITV, i) = iarg nodes(NITN, i) = narg } else { call push (stack, sp, i + RGT) call push (stack, sp, i) while (nstack (sp) != 0) { j = pop (stack, sp) k = mod (j, RGT) call readln (strngs, istrng, tag, iarg, narg) if (tag == NIL) i = NIL else { call newnod (nodes, frelst, i) nodes(NTAG, i) = tag if (hasarg (tag)) { nodes(NITV, i) = iarg nodes(NITN, i) = narg } else { call push (stack, sp, i + RGT) call push (stack, sp, i) } } if (j == k) nodes(NLEFT, k) = i else nodes(NRIGHT, k) = i } } } end #--------------------------------------------------------------------- subroutine flushl (outbuf, noutbf) # Flush a line from the output buffer. implicit none character outbuf(OUTLSZ) # Output line buffer. integer noutbf # Number of characters in outbuf. character20 fmt integer i if (noutbf == 0) write (, '()') else { write (fmt, 1000) noutbf 1000 format ('(', I10, 'A)') write (, fmt) (outbuf(i), i = 1, noutbf) noutbf = 0 } end subroutine wrtchr (outbuf, noutbf, ch) # Write a character to output. implicit none character outbuf(OUTLSZ) # Output line buffer. integer noutbf # Number of characters in outbuf. character ch # The character to output. # This routine silently truncates anything that goes past the buffer # boundary. if (ch == char (NEWLIN)) call flushl (outbuf, noutbf) else if (noutbf < OUTLSZ) { noutbf = noutbf + 1 outbuf(noutbf) = ch } end subroutine wrtstr (outbuf, noutbf, str, i, n) # Write a substring to output. implicit none character outbuf(OUTLSZ) # Output line buffer. integer noutbf # Number of characters in outbuf. character str() # The string from which to output. integer i, n # Index and length of the substring. integer j for (j = 0; j < n; j = j + 1) call wrtchr (outbuf, noutbf, str(i + j)) end subroutine wrtint (outbuf, noutbf, ival, colcnt) # Write a non-negative integer to output. implicit none character outbuf(OUTLSZ) # Output line buffer. integer noutbf # Number of characters in outbuf. integer ival # The non-negative integer to print. integer colcnt # Column count, or zero for free format. integer skipsp character40 buf integer i, j write (buf, '(I40)') ival i = skipsp (buf, 1, 41) if (0 < colcnt) for (j = 1; j < colcnt - (40 - i); j = j + 1) call wrtchr (outbuf, noutbf, ' ') while (i <= 40) { call wrtchr (outbuf, noutbf, buf(i:i)) i = i + 1 } end #--------------------------------------------------------------------- define(VARSZ, 3) define(VNAMEI, 1) # Variable name's index in the string pool. define(VNAMEN, 2) # Length of the name. define(VVALUE, 3) # Variable's number in the VM's data pool. function fndvar (vars, numvar, strngs, istrng, i0, n0) implicit none integer vars(VARSZ, MAXVAR) # Variables. integer numvar # Number of variables. character strngs(STRNSZ) # String pool. integer istrng # String pool's next slot. integer i0, n0 # Index and length in the string pool. integer fndvar # The location of the variable. integer j, k integer i, n logical done1 logical done2 j = 1 done1 = .false. while (!done1) if (j == numvar + 1) done1 = .true. else if (n0 == vars(VNAMEN, j)) { k = 0 done2 = .false. while (!done2) if (n0 <= k) done2 = .true. else if (strngs(i0 + k) == strngs(vars(VNAMEI, j) + k)) k = k + 1 else done2 = .true. if (k < n0) j = j + 1 else { done2 = .true. done1 = .true. } } else j = j + 1 if (j == numvar + 1) { if (numvar == MAXVAR) { write (, '(''too many variables'')') stop } numvar = numvar + 1 call addstr (strngs, istrng, strngs, i0, n0, i, n) vars(VNAMEI, numvar) = i vars(VNAMEN, numvar) = n vars(VVALUE, numvar) = numvar - 1 fndvar = numvar } else fndvar = j end define(STRSZ, 3) define(STRI, 1) # String's index in this program's string pool. define(STRN, 2) # Length of the string. define(STRNO, 3) # String's number in the VM's string pool. function fndstr (strs, numstr, strngs, istrng, i0, n0) implicit none integer strs(STRSZ, MAXSTR) # Strings for the VM's string pool. integer numstr # Number of such strings. character strngs(STRNSZ) # String pool. integer istrng # String pool's next slot. integer i0, n0 # Index and length in the string pool. integer fndstr # The location of the string in the VM's string pool. integer j, k integer i, n logical done1 logical done2 j = 1 done1 = .false. while (!done1) if (j == numstr + 1) done1 = .true. else if (n0 == strs(STRN, j)) { k = 0 done2 = .false. while (!done2) if (n0 <= k) done2 = .true. else if (strngs(i0 + k) == strngs(strs(STRI, j) + k)) k = k + 1 else done2 = .true. if (k < n0) j = j + 1 else { done2 = .true. done1 = .true. } } else j = j + 1 if (j == numstr + 1) { if (numstr == MAXSTR) { write (, '(''too many string literals'')') stop } numstr = numstr + 1 call addstr (strngs, istrng, strngs, i0, n0, i, n) strs(STRI, numstr) = i strs(STRN, numstr) = n strs(STRNO, numstr) = numstr - 1 fndstr = numstr } else fndstr = j end function strint (strngs, i, n) # Convert a string to a non-negative integer. implicit none character strngs(STRNSZ) # String pool. integer i, n integer strint integer j strint = 0 for (j = 0; j < n; j = j + 1) strint = (10 * strint) + (ichar (strngs(i + j)) - ichar ('0')) end subroutine put1 (code, ncode, i, opcode) # Store a 1-byte operation. implicit none integer code(0 : CODESZ - 1) # Generated code. integer ncode # Number of VM bytes in the code. integer i # Address to put the code at. integer opcode if (CODESZ - i < 1) { write (, '(''address beyond the size of memory'')') stop } code(i) = opcode ncode = max (ncode, i + 1) end subroutine put5 (code, ncode, i, opcode, ival) # Store a 5-byte operation. implicit none integer code(0 : CODESZ - 1) # Generated code. integer ncode # Number of VM bytes in the code. integer i # Address to put the code at. integer opcode integer ival # Immediate integer value. if (CODESZ - i < 5) { write (, '(''address beyond the size of memory'')') stop } code(i) = opcode code(i + 1) = ival # Do not bother to break the integer into bytes. code(i + 2) = 0 code(i + 3) = 0 code(i + 4) = 0 ncode = max (ncode, i + 5) end subroutine compil (vars, numvar, _ strs, numstr, _ strngs, istrng, _ nodes, frelst, _ code, ncode, iast) # Compile the AST to virtual machine code. The algorithm employed is # non-recursive. implicit none integer vars(VARSZ, MAXVAR) # Variables. integer numvar # Number of variables. integer strs(STRSZ, MAXSTR) # Strings for the VM's string pool. integer numstr # Number of such strings. character strngs(STRNSZ) # String pool. integer istrng # String pool's next slot. integer nodes (NODESZ, NODSSZ) # Nodes pool. integer frelst # Head of the free list. integer code(0 : CODESZ - 1) # Generated code. integer ncode # Number of VM bytes in the code. integer iast # Root node of the AST. integer fndvar integer fndstr integer nstack integer pop integer strint integer xstack(STCKSZ) # Node stack. integer ixstck # Node stack pointer. integer i integer i0, n0 integer tag integer ivar integer inode1, inode2, inode3 integer addr1, addr2 ixstck = 1 call push (xstack, ixstck, iast) while (nstack (ixstck) != 0) { i = pop (xstack, ixstck) if (i == NIL) tag = NIL else tag = nodes(NTAG, i) if (tag == NIL) continue else if (tag < STAGE2) { if (tag == NDSEQ) { if (nodes(NRIGHT, i) != NIL) call push (xstack, ixstck, nodes(NRIGHT, i)) if (nodes(NLEFT, i) != NIL) call push (xstack, ixstck, nodes(NLEFT, i)) } else if (tag == NDID) { # Fetch the value of a variable. i0 = nodes(NITV, i) n0 = nodes(NITN, i) ivar = fndvar (vars, numvar, strngs, istrng, i0, n0) ivar = vars(VVALUE, ivar) call put5 (code, ncode, ncode, OPFTCH, ivar) } else if (tag == NDINT) { # Push the value of an integer literal. i0 = nodes(NITV, i) n0 = nodes(NITN, i) call put5 (code, ncode, ncode, OPPUSH, _ strint (strngs, i0, n0)) } else if (tag == NDNEG) { call newnod (nodes, frelst, inode1) nodes(NTAG, inode1) = NDNEG + STAGE2 call push (xstack, ixstck, inode1) call push (xstack, ixstck, nodes(NLEFT, i)) } else if (tag == NDNOT) { call newnod (nodes, frelst, inode1) nodes(NTAG, inode1) = NDNOT + STAGE2 call push (xstack, ixstck, inode1) call push (xstack, ixstck, nodes(NLEFT, i)) } else if (tag == NDAND) { call newnod (nodes, frelst, inode1) nodes(NTAG, inode1) = NDAND + STAGE2 call push (xstack, ixstck, inode1) call push (xstack, ixstck, nodes(NRIGHT, i)) call push (xstack, ixstck, nodes(NLEFT, i)) } else if (tag == NDOR) { call newnod (nodes, frelst, inode1) nodes(NTAG, inode1) = NDOR + STAGE2 call push (xstack, ixstck, inode1) call push (xstack, ixstck, nodes(NRIGHT, i)) call push (xstack, ixstck, nodes(NLEFT, i)) } else if (tag == NDADD) { call newnod (nodes, frelst, inode1) nodes(NTAG, inode1) = NDADD + STAGE2 call push (xstack, ixstck, inode1) call push (xstack, ixstck, nodes(NRIGHT, i)) call push (xstack, ixstck, nodes(NLEFT, i)) } else if (tag == NDSUB) { call newnod (nodes, frelst, inode1) nodes(NTAG, inode1) = NDSUB + STAGE2 call push (xstack, ixstck, inode1) call push (xstack, ixstck, nodes(NRIGHT, i)) call push (xstack, ixstck, nodes(NLEFT, i)) } else if (tag == NDMUL) { call newnod (nodes, frelst, inode1) nodes(NTAG, inode1) = NDMUL + STAGE2 call push (xstack, ixstck, inode1) call push (xstack, ixstck, nodes(NRIGHT, i)) call push (xstack, ixstck, nodes(NLEFT, i)) } else if (tag == NDDIV) { call newnod (nodes, frelst, inode1) nodes(NTAG, inode1) = NDDIV + STAGE2 call push (xstack, ixstck, inode1) call push (xstack, ixstck, nodes(NRIGHT, i)) call push (xstack, ixstck, nodes(NLEFT, i)) } else if (tag == NDMOD) { call newnod (nodes, frelst, inode1) nodes(NTAG, inode1) = NDMOD + STAGE2 call push (xstack, ixstck, inode1) call push (xstack, ixstck, nodes(NRIGHT, i)) call push (xstack, ixstck, nodes(NLEFT, i)) } else if (tag == NDLT) { call newnod (nodes, frelst, inode1) nodes(NTAG, inode1) = NDLT + STAGE2 call push (xstack, ixstck, inode1) call push (xstack, ixstck, nodes(NRIGHT, i)) call push (xstack, ixstck, nodes(NLEFT, i)) } else if (tag == NDLE) { call newnod (nodes, frelst, inode1) nodes(NTAG, inode1) = NDLE + STAGE2 call push (xstack, ixstck, inode1) call push (xstack, ixstck, nodes(NRIGHT, i)) call push (xstack, ixstck, nodes(NLEFT, i)) } else if (tag == NDGT) { call newnod (nodes, frelst, inode1) nodes(NTAG, inode1) = NDGT + STAGE2 call push (xstack, ixstck, inode1) call push (xstack, ixstck, nodes(NRIGHT, i)) call push (xstack, ixstck, nodes(NLEFT, i)) } else if (tag == NDGE) { call newnod (nodes, frelst, inode1) nodes(NTAG, inode1) = NDGE + STAGE2 call push (xstack, ixstck, inode1) call push (xstack, ixstck, nodes(NRIGHT, i)) call push (xstack, ixstck, nodes(NLEFT, i)) } else if (tag == NDEQ) { call newnod (nodes, frelst, inode1) nodes(NTAG, inode1) = NDEQ + STAGE2 call push (xstack, ixstck, inode1) call push (xstack, ixstck, nodes(NRIGHT, i)) call push (xstack, ixstck, nodes(NLEFT, i)) } else if (tag == NDNE) { call newnod (nodes, frelst, inode1) nodes(NTAG, inode1) = NDNE + STAGE2 call push (xstack, ixstck, inode1) call push (xstack, ixstck, nodes(NRIGHT, i)) call push (xstack, ixstck, nodes(NLEFT, i)) } else if (tag == NDASGN) { call newnod (nodes, frelst, inode1) nodes(NTAG, inode1) = NDASGN + STAGE2 nodes(NITV, inode1) = nodes(NITV, nodes(NLEFT, i)) nodes(NITN, inode1) = nodes(NITN, nodes(NLEFT, i)) call push (xstack, ixstck, inode1) call push (xstack, ixstck, nodes(NRIGHT, i)) } else if (tag == NDPRTS) { i0 = nodes(NITV, nodes(NLEFT, i)) n0 = nodes(NITN, nodes(NLEFT, i)) ivar = fndstr (strs, numstr, strngs, istrng, i0, n0) ivar = strs(STRNO, ivar) call put5 (code, ncode, ncode, OPPUSH, ivar) call put1 (code, ncode, ncode, OPPRTS) } else if (tag == NDPRTC) { call newnod (nodes, frelst, inode1) nodes(NTAG, inode1) = NDPRTC + STAGE2 call push (xstack, ixstck, inode1) call push (xstack, ixstck, nodes(NLEFT, i)) } else if (tag == NDPRTI) { call newnod (nodes, frelst, inode1) nodes(NTAG, inode1) = NDPRTI + STAGE2 call push (xstack, ixstck, inode1) call push (xstack, ixstck, nodes(NLEFT, i)) } else if (tag == NDWHIL) { call newnod (nodes, frelst, inode1) nodes(NTAG, inode1) = NDWHIL + STAGE2 nodes(NLEFT, inode1) = nodes(NRIGHT, i) # Loop body. nodes(NRIGHT, inode1) = ncode # Addr. of top of loop. call push (xstack, ixstck, inode1) call push (xstack, ixstck, nodes(NLEFT, i)) } else if (tag == NDIF) { call newnod (nodes, frelst, inode1) nodes(NTAG, inode1) = NDIF + STAGE2 # The "then" and "else" clauses, respectively: nodes(NLEFT, inode1) = nodes(NLEFT, nodes(NRIGHT, i)) nodes(NRIGHT, inode1) = nodes(NRIGHT, nodes(NRIGHT, i)) call push (xstack, ixstck, inode1) call push (xstack, ixstck, nodes(NLEFT, i)) } } else { if (tag == NDNEG + STAGE2) { call frenod (nodes, frelst, i) call put1 (code, ncode, ncode, OPNEG) } else if (tag == NDNOT + STAGE2) { call frenod (nodes, frelst, i) call put1 (code, ncode, ncode, OPNOT) } else if (tag == NDAND + STAGE2) { call frenod (nodes, frelst, i) call put1 (code, ncode, ncode, OPAND) } else if (tag == NDOR + STAGE2) { call frenod (nodes, frelst, i) call put1 (code, ncode, ncode, OPOR) } else if (tag == NDADD + STAGE2) { call frenod (nodes, frelst, i) call put1 (code, ncode, ncode, OPADD) } else if (tag == NDSUB + STAGE2) { call frenod (nodes, frelst, i) call put1 (code, ncode, ncode, OPSUB) } else if (tag == NDMUL + STAGE2) { call frenod (nodes, frelst, i) call put1 (code, ncode, ncode, OPMUL) } else if (tag == NDDIV + STAGE2) { call frenod (nodes, frelst, i) call put1 (code, ncode, ncode, OPDIV) } else if (tag == NDMOD + STAGE2) { call frenod (nodes, frelst, i) call put1 (code, ncode, ncode, OPMOD) } else if (tag == NDLT + STAGE2) { call frenod (nodes, frelst, i) call put1 (code, ncode, ncode, OPLT) } else if (tag == NDLE + STAGE2) { call frenod (nodes, frelst, i) call put1 (code, ncode, ncode, OPLE) } else if (tag == NDGT + STAGE2) { call frenod (nodes, frelst, i) call put1 (code, ncode, ncode, OPGT) } else if (tag == NDGE + STAGE2) { call frenod (nodes, frelst, i) call put1 (code, ncode, ncode, OPGE) } else if (tag == NDEQ + STAGE2) { call frenod (nodes, frelst, i) call put1 (code, ncode, ncode, OPEQ) } else if (tag == NDNE + STAGE2) { call frenod (nodes, frelst, i) call put1 (code, ncode, ncode, OPNE) } else if (tag == NDASGN + STAGE2) { i0 = nodes(NITV, i) n0 = nodes(NITN, i) call frenod (nodes, frelst, i) ivar = fndvar (vars, numvar, strngs, istrng, i0, n0) ivar = vars(VVALUE, ivar) call put5 (code, ncode, ncode, OPSTOR, ivar) } else if (tag == NDPRTC + STAGE2) { call frenod (nodes, frelst, i) call put1 (code, ncode, ncode, OPPRTC) } else if (tag == NDPRTI + STAGE2) { call frenod (nodes, frelst, i) call put1 (code, ncode, ncode, OPPRTI) } else if (tag == NDWHIL + STAGE2) { inode1 = nodes(NLEFT, i) # Loop body. addr1 = nodes(NRIGHT, i) # Addr. of top of loop. call frenod (nodes, frelst, i) call put5 (code, ncode, ncode, OPJZ, 0) call newnod (nodes, frelst, inode2) nodes(NTAG, inode2) = NDWHIL + STAGE3 nodes(NLEFT, inode2) = addr1 # Top of loop. nodes(NRIGHT, inode2) = ncode - 4 # Fixup address. call push (xstack, ixstck, inode2) call push (xstack, ixstck, inode1) } else if (tag == NDWHIL + STAGE3) { addr1 = nodes(NLEFT, i) # Top of loop. addr2 = nodes(NRIGHT, i) # Fixup address. call frenod (nodes, frelst, i) call put5 (code, ncode, ncode, OPJMP, addr1) code(addr2) = ncode } else if (tag == NDIF + STAGE2) { inode1 = nodes(NLEFT, i) # "Then" clause. inode2 = nodes(NRIGHT, i) # "Else" clause. call frenod (nodes, frelst, i) call put5 (code, ncode, ncode, OPJZ, 0) call newnod (nodes, frelst, inode3) nodes(NTAG, inode3) = NDIF + STAGE3 nodes(NLEFT, inode3) = ncode - 4 # Fixup address. nodes(NRIGHT, inode3) = inode2 # "Else" clause. call push (xstack, ixstck, inode3) call push (xstack, ixstck, inode1) } else if (tag == NDIF + STAGE3) { addr1 = nodes(NLEFT, i) # Fixup address. inode1 = nodes(NRIGHT, i) # "Else" clause. call frenod (nodes, frelst, i) if (inode2 == NIL) code(addr1) = ncode else { call put5 (code, ncode, ncode, OPJMP, 0) addr2 = ncode - 4 # Another fixup address. code(addr1) = ncode call newnod (nodes, frelst, inode2) nodes(NTAG, inode2) = NDIF + STAGE4 nodes(NLEFT, inode2) = addr2 call push (xstack, ixstck, inode2) call push (xstack, ixstck, inode1) } } else if (tag == NDIF + STAGE4) { addr1 = nodes(NLEFT, i) # Fixup address. call frenod (nodes, frelst, i) code(addr1) = ncode } } } call put1 (code, ncode, ncode, OPHALT) end function opname (opcode) implicit none integer opcode character8 opname if (opcode == OPHALT) opname = 'halt ' else if (opcode == OPADD) opname = 'add ' else if (opcode == OPSUB) opname = 'sub ' else if (opcode == OPMUL) opname = 'mul ' else if (opcode == OPDIV) opname = 'div ' else if (opcode == OPMOD) opname = 'mod ' else if (opcode == OPLT) opname = 'lt ' else if (opcode == OPGT) opname = 'gt ' else if (opcode == OPLE) opname = 'le ' else if (opcode == OPGE) opname = 'ge ' else if (opcode == OPEQ) opname = 'eq ' else if (opcode == OPNE) opname = 'ne ' else if (opcode == OPAND) opname = 'and ' else if (opcode == OPOR) opname = 'or ' else if (opcode == OPNEG) opname = 'neg ' else if (opcode == OPNOT) opname = 'not ' else if (opcode == OPPRTC) opname = 'prtc ' else if (opcode == OPPRTI) opname = 'prti ' else if (opcode == OPPRTS) opname = 'prts ' else if (opcode == OPFTCH) opname = 'fetch ' else if (opcode == OPSTOR) opname = 'store ' else if (opcode == OPPUSH) opname = 'push ' else if (opcode == OPJMP) opname = 'jmp ' else if (opcode == OPJZ) opname = 'jz ' else { write (, '(''Unrecognized opcode: '', I5)') opcode stop } end subroutine prprog (numvar, strs, numstr, strngs, istrng, _ code, ncode, outbuf, noutbf) implicit none integer numvar # Number of variables. integer strs(STRSZ, MAXSTR) # Strings for the VM's string pool. integer numstr # Number of such strings. character strngs(STRNSZ) # String pool. integer istrng # String pool's next slot. integer code(0 : CODESZ - 1) # Generated code. integer ncode # Number of VM bytes in the code. character outbuf(OUTLSZ) # Output line buffer. integer noutbf # Number of characters in outbuf. character8 opname integer i0, n0 integer i, j integer opcode character8 name character buf(20) buf(1) = 'D' buf(2) = 'a' buf(3) = 't' buf(4) = 'a' buf(5) = 's' buf(6) = 'i' buf(7) = 'z' buf(8) = 'e' buf(9) = ':' buf(10) = ' ' call wrtstr (outbuf, noutbf, buf, 1, 10) call wrtint (outbuf, noutbf, numvar, 0) buf(1) = ' ' buf(2) = 'S' buf(3) = 't' buf(4) = 'r' buf(5) = 'i' buf(6) = 'n' buf(7) = 'g' buf(8) = 's' buf(9) = ':' buf(10) = ' ' call wrtstr (outbuf, noutbf, buf, 1, 10) call wrtint (outbuf, noutbf, numstr, 0) call wrtchr (outbuf, noutbf, char (NEWLIN)) for (i = 1; i <= numstr; i = i + 1) { i0 = strs(STRI, i) n0 = strs(STRN, i) call wrtstr (outbuf, noutbf, strngs, i0, n0) call wrtchr (outbuf, noutbf, char (NEWLIN)) } i = 0 while (i != ncode) { opcode = code(i) name = opname (opcode) call wrtint (outbuf, noutbf, i, 10) for (j = 1; j <= 2; j = j + 1) call wrtchr (outbuf, noutbf, ' ') for (j = 1; j <= 8; j = j + 1) { if (opcode == OPFTCH _ \|\| opcode == OPSTOR _ \|\| opcode == OPPUSH _ \|\| opcode == OPJMP _ \|\| opcode == OPJZ) call wrtchr (outbuf, noutbf, name(j:j)) else if (name(j:j) != ' ') call wrtchr (outbuf, noutbf, name(j:j)) } if (opcode == OPPUSH) { call wrtint (outbuf, noutbf, code(i + 1), 0) i = i + 5 } else if (opcode == OPFTCH \|\| opcode == OPSTOR) { call wrtchr (outbuf, noutbf, '[') call wrtint (outbuf, noutbf, code(i + 1), 0) call wrtchr (outbuf, noutbf, ']') i = i + 5 } else if (opcode == OPJMP \|\| opcode == OPJZ) { call wrtchr (outbuf, noutbf, '(') call wrtint (outbuf, noutbf, code(i + 1) - (i + 1), 0) call wrtchr (outbuf, noutbf, ')') call wrtchr (outbuf, noutbf, ' ') call wrtint (outbuf, noutbf, code(i + 1), 0) i = i + 5 } else i = i + 1 call wrtchr (outbuf, noutbf, char (NEWLIN)) } end #--------------------------------------------------------------------- program gen implicit none integer vars(VARSZ, MAXVAR) # Variables. integer numvar # Number of variables. integer strs(STRSZ, MAXSTR) # Strings for the VM's string pool. integer numstr # Number of such strings. character strngs(STRNSZ) # String pool. integer istrng # String pool's next slot. integer nodes (NODESZ, NODSSZ) # Nodes pool. integer frelst # Head of the free list. character outbuf(OUTLSZ) # Output line buffer. integer noutbf # Number of characters in outbuf. integer code(0 : CODESZ - 1) # Generated code. integer ncode # Number of VM bytes in the code. integer iast # Root node of the AST. numvar = 0 numstr = 0 istrng = 1 noutbf = 0 ncode = 0 call initnd (nodes, frelst) call rdast (strngs, istrng, nodes, frelst, iast) call compil (vars, numvar, strs, numstr, _ strngs, istrng, nodes, frelst, _ code, ncode, iast) call prprog (numvar, strs, numstr, strngs, istrng, _ code, ncode, outbuf, noutbf) if (noutbf != 0) call flushl (outbuf, noutbf) end ######################################################################</syntaxhighlight> {{out}} <pre>$ ratfor77 gen-in-ratfor.r > gen-in-ratfor.f && gfortran -fcheck=all -std=legacy -O2 gen-in-ratfor.f && ./a.out < compiler-tests/primes.ast Datasize: 5 Strings: 3 " is prime\n" "Total primes found: " "\n" 0 push 1 5 store [0] 10 push 1 15 store [1] 20 push 100 25 store [2] 30 fetch [1] 35 fetch [2] 40 lt 41 jz (160) 202 46 push 3 51 store [3] 56 push 1 61 store [4] 66 fetch [1] 71 push 2 76 add 77 store [1] 82 fetch [3] 87 fetch [3] 92 mul 93 fetch [1] 98 le 99 fetch [4] 104 and 105 jz (53) 159 110 fetch [1] 115 fetch [3] 120 div 121 fetch [3] 126 mul 127 fetch [1] 132 ne 133 store [4] 138 fetch [3] 143 push 2 148 add 149 store [3] 154 jmp (-73) 82 159 fetch [4] 164 jz (32) 197 169 fetch [1] 174 prti 175 push 0 180 prts 181 fetch [0] 186 push 1 191 add 192 store [0] 197 jmp (-168) 30 202 push 1 207 prts 208 fetch [0] 213 prti 214 push 2 219 prts 220 halt</pre> =={{header\|Scala}}== Line 5,345 ⟶ 9,758: The following code implements a code generator for the output of the [http://rosettacode.org/wiki/Compiler/syntax_analyzer#Scala parser]. <~~lang~~syntaxhighlight lang="scala"> package xyz.hyperreal.rosettacodeCompiler Line 5,501 ⟶ 9,914: } </syntaxhighlight> ~~</lang>~~ =={{header\|Scheme}}== <~~lang~~syntaxhighlight lang="scheme"> (import (scheme base) (scheme file) Line 5,693 ⟶ 10,106: (generate-code (read-code (cadr (command-line)))) (display "Error: pass an ast filename\n")) </syntaxhighlight> ~~</lang>~~ Tested on all examples in [[Compiler/Sample programs]]. Line 5,703 ⟶ 10,116: {{libheader\|Wren-fmt}} {{libheader\|Wren-ioutil}} <~~lang~~syntaxhighlight ~~ecmascript~~lang="wren">import "./dynamic" for Enum, Struct, Tuple import "./crypto" for Bytes import "./fmt" for Fmt import "./ioutil" for FileUtil var nodes = [ Line 6,047 ⟶ 10,460: codeGen.call(loadAst.call()) codeFinish.call() listCode.call()</~~lang~~syntaxhighlight> {{out}} Line 6,075 ⟶ 10,488: =={{header\|Zig}}== <~~lang~~syntaxhighlight lang="zig"> const std = @import("std"); Line 6,584 ⟶ 10,997: } } </syntaxhighlight> ~~</lang>~~ =={{header\|zkl}}== {{trans\|Python}} <~~lang~~syntaxhighlight lang="zkl">// This is a little endian machine const WORD_SIZE=4; Line 6,700 ⟶ 11,113: code.insert(0,66,text.len(),text); }) }</~~lang~~syntaxhighlight> <~~lang~~syntaxhighlight lang="zkl">fcn unasm(code){ all_ops,nthString := all_syms.pump(Dictionary(),"reverse"),-1; println("Datasize: %d bytes, Strings: %d bytes" Line 6,735 ⟶ 11,148: } } }</~~lang~~syntaxhighlight> <~~lang~~syntaxhighlight lang="zkl">fcn load_ast(file){ line:=file.readln().strip(); // one or two tokens if(line[0]==";") return(Void); Line 6,747 ⟶ 11,160: left,right := load_ast(file),load_ast(file); Node(type,Void,left,right) }</~~lang~~syntaxhighlight> <~~lang~~syntaxhighlight lang="zkl">ast:=load_ast(File(vm.nthArg(0))); code:=asm(ast,Data()); code_finish(code); unasm(code); File("code.bin","wb").write(code); println("Wrote %d bytes to code.bin".fmt(code.len()));</~~lang~~syntaxhighlight> File ast.txt is the text at the start of this task. {{out}}