Compiler/code generator: Difference between revisions

Content added Content deleted
(Added Wren)
(Add Zig language implementation)
Line 5,899: Line 5,899:
65 halt
65 halt
</pre>
</pre>

=={{header|Zig}}==
<lang zig>
const std = @import("std");

pub const CodeGeneratorError = error{OutOfMemory};

pub const CodeGenerator = struct {
allocator: *std.mem.Allocator,
string_pool: std.ArrayList([]const u8),
globals: std.ArrayList([]const u8),
bytecode: std.ArrayList(u8),

const Self = @This();
const word_size = @sizeOf(i32);

pub fn init(
allocator: *std.mem.Allocator,
string_pool: std.ArrayList([]const u8),
globals: std.ArrayList([]const u8),
) Self {
return CodeGenerator{
.allocator = allocator,
.string_pool = string_pool,
.globals = globals,
.bytecode = std.ArrayList(u8).init(allocator),
};
}

pub fn gen(self: *Self, ast: ?*Tree) CodeGeneratorError!void {
try self.genH(ast);
try self.emitHalt();
}

// Helper function to allow recursion.
pub fn genH(self: *Self, ast: ?*Tree) CodeGeneratorError!void {
if (ast) |t| {
switch (t.typ) {
.sequence => {
try self.genH(t.left);
try self.genH(t.right);
},
.kw_while => {
const condition_address = self.currentAddress();
try self.genH(t.left);
try self.emitByte(.jz);
const condition_address_hole = self.currentAddress();
try self.emitHole();
try self.genH(t.right);
try self.emitByte(.jmp);
try self.emitInt(condition_address);
self.insertInt(condition_address_hole, self.currentAddress());
},
.kw_if => {
try self.genH(t.left);
try self.emitByte(.jz);
const condition_address_hole = self.currentAddress();
try self.emitHole();
try self.genH(t.right.?.left);
if (t.right.?.right) |else_tree| {
try self.emitByte(.jmp);
const else_address_hole = self.currentAddress();
try self.emitHole();
const else_address = self.currentAddress();
try self.genH(else_tree);
self.insertInt(condition_address_hole, else_address);
self.insertInt(else_address_hole, self.currentAddress());
} else {
self.insertInt(condition_address_hole, self.currentAddress());
}
},
.assign => {
try self.genH(t.right);
try self.emitByte(.store);
try self.emitInt(self.fetchGlobalsOffset(t.left.?.value.?.string));
},
.prts => {
try self.genH(t.left);
try self.emitByte(.prts);
},
.prti => {
try self.genH(t.left);
try self.emitByte(.prti);
},
.prtc => {
try self.genH(t.left);
try self.emitByte(.prtc);
},
.string => {
try self.emitByte(.push);
try self.emitInt(self.fetchStringsOffset(t.value.?.string));
},
.integer => {
try self.emitByte(.push);
try self.emitInt(t.value.?.integer);
},
.identifier => {
try self.emitByte(.fetch);
try self.emitInt(self.fetchGlobalsOffset(t.value.?.string));
},
.negate, .not => {
try self.genH(t.left);
try self.emitByte(Op.fromNodeType(t.typ).?);
},
.add,
.multiply,
.subtract,
.divide,
.mod,
.less,
.less_equal,
.greater,
.greater_equal,
.equal,
.not_equal,
.bool_and,
.bool_or,
=> try self.genBinOp(t),
.unknown => {
std.debug.print("\nINTERP: UNKNOWN {}\n", .{t.typ});
std.os.exit(1);
},
}
}
}

fn genBinOp(self: *Self, tree: *Tree) CodeGeneratorError!void {
try self.genH(tree.left);
try self.genH(tree.right);
try self.emitByte(Op.fromNodeType(tree.typ).?);
}

fn emitByte(self: *Self, op: Op) CodeGeneratorError!void {
try self.bytecode.append(@enumToInt(op));
}

fn emitInt(self: *Self, n: i32) CodeGeneratorError!void {
var n_var = n;
var n_bytes = @ptrCast(*[4]u8, &n_var);
for (n_bytes) |byte| {
try self.bytecode.append(byte);
}
}

// Holes are later populated via `insertInt` because they can't be known when
// we populate the bytecode array sequentially.
fn emitHole(self: *Self) CodeGeneratorError!void {
try self.emitInt(std.math.maxInt(i32));
}

// Populates the "hole" produced by `emitHole`.
fn insertInt(self: *Self, address: i32, n: i32) void {
var i: i32 = 0;
var n_var = n;
var n_bytes = @ptrCast(*[4]u8, &n_var);
while (i < word_size) : (i += 1) {
self.bytecode.items[@intCast(usize, address + i)] = n_bytes[@intCast(usize, i)];
}
}

fn emitHalt(self: *Self) CodeGeneratorError!void {
try self.bytecode.append(@enumToInt(Op.halt));
}

fn currentAddress(self: Self) i32 {
return @intCast(i32, self.bytecode.items.len);
}

fn fetchStringsOffset(self: Self, str: []const u8) i32 {
for (self.string_pool.items) |string, idx| {
if (std.mem.eql(u8, string, str)) {
return @intCast(i32, idx);
}
}
unreachable;
}

fn fetchGlobalsOffset(self: Self, str: []const u8) i32 {
for (self.globals.items) |global, idx| {
if (std.mem.eql(u8, global, str)) {
return @intCast(i32, idx);
}
}
unreachable;
}

pub fn print(self: Self) ![]u8 {
var result = std.ArrayList(u8).init(self.allocator);
var writer = result.writer();
try writer.print(
"Datasize: {d} Strings: {d}\n",
.{ self.globals.items.len, self.string_pool.items.len },
);
for (self.string_pool.items) |string| {
try writer.print("{s}\n", .{string});
}

var pc: usize = 0;
while (pc < self.bytecode.items.len) : (pc += 1) {
try writer.print("{d:>5} ", .{pc});
switch (@intToEnum(Op, self.bytecode.items[pc])) {
.push => {
try writer.print("push {d}\n", .{self.unpackInt(pc + 1)});
pc += word_size;
},
.store => {
try writer.print("store [{d}]\n", .{self.unpackInt(pc + 1)});
pc += word_size;
},
.fetch => {
try writer.print("fetch [{d}]\n", .{self.unpackInt(pc + 1)});
pc += word_size;
},
.jz => {
const address = self.unpackInt(pc + 1);
try writer.print("jz ({d}) {d}\n", .{ address - @intCast(i32, pc) - 1, address });
pc += word_size;
},
.jmp => {
const address = self.unpackInt(pc + 1);
try writer.print("jmp ({d}) {d}\n", .{ address - @intCast(i32, pc) - 1, address });
pc += word_size;
},
else => try writer.print("{s}\n", .{Op.toString(@intToEnum(Op, self.bytecode.items[pc]))}),
}
}

return result.items;
}

fn unpackInt(self: Self, pc: usize) i32 {
const arg_ptr = @ptrCast(*[4]u8, self.bytecode.items[pc .. pc + word_size]);
var arg_array = arg_ptr.*;
const arg = @ptrCast(*i32, @alignCast(@alignOf(i32), &arg_array));
return arg.*;
}
};

pub const Op = enum(u8) {
fetch,
store,
push,
add,
sub,
mul,
div,
mod,
lt,
gt,
le,
ge,
eq,
ne,
@"and",
@"or",
neg,
not,
jmp,
jz,
prtc,
prts,
prti,
halt,

const from_node = std.enums.directEnumArray(NodeType, ?Op, 0, .{
.unknown = null,
.identifier = null,
.string = null,
.integer = null,
.sequence = null,
.kw_if = null,
.prtc = null,
.prts = null,
.prti = null,
.kw_while = null,
.assign = null,
.negate = .neg,
.not = .not,
.multiply = .mul,
.divide = .div,
.mod = .mod,
.add = .add,
.subtract = .sub,
.less = .lt,
.less_equal = .le,
.greater = .gt,
.greater_equal = .ge,
.equal = .eq,
.not_equal = .ne,
.bool_and = .@"and",
.bool_or = .@"or",
});

pub fn fromNodeType(node_type: NodeType) ?Op {
return from_node[@enumToInt(node_type)];
}

const to_string = std.enums.directEnumArray(Op, []const u8, 0, .{
.fetch = "fetch",
.store = "store",
.push = "push",
.add = "add",
.sub = "sub",
.mul = "mul",
.div = "div",
.mod = "mod",
.lt = "lt",
.gt = "gt",
.le = "le",
.ge = "ge",
.eq = "eq",
.ne = "ne",
.@"and" = "and",
.@"or" = "or",
.neg = "neg",
.not = "not",
.jmp = "jmp",
.jz = "jz",
.prtc = "prtc",
.prts = "prts",
.prti = "prti",
.halt = "halt",
});

pub fn toString(self: Op) []const u8 {
return to_string[@enumToInt(self)];
}
};

pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
var allocator = &arena.allocator;

var arg_it = std.process.args();
_ = try arg_it.next(allocator) orelse unreachable; // program name
const file_name = arg_it.next(allocator);
// We accept both files and standard input.
var file_handle = blk: {
if (file_name) |file_name_delimited| {
const fname: []const u8 = try file_name_delimited;
break :blk try std.fs.cwd().openFile(fname, .{});
} else {
break :blk std.io.getStdIn();
}
};
defer file_handle.close();
const input_content = try file_handle.readToEndAlloc(allocator, std.math.maxInt(usize));

var string_pool = std.ArrayList([]const u8).init(allocator);
var globals = std.ArrayList([]const u8).init(allocator);
const ast = try loadAST(allocator, input_content, &string_pool, &globals);
var code_generator = CodeGenerator.init(allocator, string_pool, globals);
try code_generator.gen(ast);
const result: []const u8 = try code_generator.print();
_ = try std.io.getStdOut().write(result);
}

pub const NodeType = enum {
unknown,
identifier,
string,
integer,
sequence,
kw_if,
prtc,
prts,
prti,
kw_while,
assign,
negate,
not,
multiply,
divide,
mod,
add,
subtract,
less,
less_equal,
greater,
greater_equal,
equal,
not_equal,
bool_and,
bool_or,

const from_string_map = std.ComptimeStringMap(NodeType, .{
.{ "UNKNOWN", .unknown },
.{ "Identifier", .identifier },
.{ "String", .string },
.{ "Integer", .integer },
.{ "Sequence", .sequence },
.{ "If", .kw_if },
.{ "Prtc", .prtc },
.{ "Prts", .prts },
.{ "Prti", .prti },
.{ "While", .kw_while },
.{ "Assign", .assign },
.{ "Negate", .negate },
.{ "Not", .not },
.{ "Multiply", .multiply },
.{ "Divide", .divide },
.{ "Mod", .mod },
.{ "Add", .add },
.{ "Subtract", .subtract },
.{ "Less", .less },
.{ "LessEqual", .less_equal },
.{ "Greater", .greater },
.{ "GreaterEqual", .greater_equal },
.{ "Equal", .equal },
.{ "NotEqual", .not_equal },
.{ "And", .bool_and },
.{ "Or", .bool_or },
});

pub fn fromString(str: []const u8) NodeType {
return from_string_map.get(str).?;
}
};

pub const NodeValue = union(enum) {
integer: i32,
string: []const u8,
};

pub const Tree = struct {
left: ?*Tree,
right: ?*Tree,
typ: NodeType = .unknown,
value: ?NodeValue = null,

fn makeNode(allocator: *std.mem.Allocator, typ: NodeType, left: ?*Tree, right: ?*Tree) !*Tree {
const result = try allocator.create(Tree);
result.* = Tree{ .left = left, .right = right, .typ = typ };
return result;
}

fn makeLeaf(allocator: *std.mem.Allocator, typ: NodeType, value: ?NodeValue) !*Tree {
const result = try allocator.create(Tree);
result.* = Tree{ .left = null, .right = null, .typ = typ, .value = value };
return result;
}
};

const LoadASTError = error{OutOfMemory} || std.fmt.ParseIntError;

fn loadAST(
allocator: *std.mem.Allocator,
str: []const u8,
string_pool: *std.ArrayList([]const u8),
globals: *std.ArrayList([]const u8),
) LoadASTError!?*Tree {
var line_it = std.mem.split(str, "\n");
return try loadASTHelper(allocator, &line_it, string_pool, globals);
}

fn loadASTHelper(
allocator: *std.mem.Allocator,
line_it: *std.mem.SplitIterator,
string_pool: *std.ArrayList([]const u8),
globals: *std.ArrayList([]const u8),
) LoadASTError!?*Tree {
if (line_it.next()) |line| {
var tok_it = std.mem.tokenize(line, " ");
const tok_str = tok_it.next().?;
if (tok_str[0] == ';') return null;

const node_type = NodeType.fromString(tok_str);
const pre_iteration_index = tok_it.index;

if (tok_it.next()) |leaf_value| {
const node_value = blk: {
switch (node_type) {
.integer => break :blk NodeValue{ .integer = try std.fmt.parseInt(i32, leaf_value, 10) },
.identifier => {
var already_exists = false;
for (globals.items) |global| {
if (std.mem.eql(u8, global, leaf_value)) {
already_exists = true;
break;
}
}
if (!already_exists) try globals.append(leaf_value);
break :blk NodeValue{ .string = leaf_value };
},
.string => {
tok_it.index = pre_iteration_index;
const str = tok_it.rest();
var already_exists = false;
for (string_pool.items) |string| {
if (std.mem.eql(u8, string, str)) {
already_exists = true;
break;
}
}
if (!already_exists) try string_pool.append(str);
break :blk NodeValue{ .string = str };
},
else => unreachable,
}
};
return try Tree.makeLeaf(allocator, node_type, node_value);
}

const left = try loadASTHelper(allocator, line_it, string_pool, globals);
const right = try loadASTHelper(allocator, line_it, string_pool, globals);
return try Tree.makeNode(allocator, node_type, left, right);
} else {
return null;
}
}

fn squishSpaces(allocator: *std.mem.Allocator, str: []const u8) ![]u8 {
var result = std.ArrayList(u8).init(allocator);
var was_space = false;
for (str) |ch| {
switch (ch) {
' ' => {
if (!was_space) {
was_space = true;
try result.append(ch);
}
},
else => {
was_space = false;
try result.append(ch);
},
}
}
return result.items;
}
</lang>


=={{header|zkl}}==
=={{header|zkl}}==