rudus/src/compiler.rs

453 lines
15 KiB
Rust
Raw Normal View History

use crate::parser::Ast;
use crate::spans::Spanned;
use crate::value::*;
use chumsky::prelude::SimpleSpan;
use num_derive::{FromPrimitive, ToPrimitive};
use num_traits::FromPrimitive;
#[derive(Copy, Clone, Debug, PartialEq, Eq, FromPrimitive, ToPrimitive)]
2024-12-16 04:28:57 +00:00
pub enum Op {
2024-12-23 00:07:42 +00:00
Nil,
True,
False,
Constant,
2024-12-16 04:28:57 +00:00
Jump,
JumpIfFalse,
2024-12-18 04:45:39 +00:00
Pop,
PushBinding,
Store,
Load,
2024-12-23 00:07:42 +00:00
ResetMatch,
MatchNil,
MatchTrue,
MatchFalse,
MatchWord,
PanicIfNoMatch,
MatchConstant,
MatchTuple,
PushTuple,
PushList,
PushDict,
PushBox,
GetKey,
PanicNoWhen,
2024-12-16 04:28:57 +00:00
}
impl std::fmt::Display for Op {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
use Op::*;
2024-12-23 00:07:42 +00:00
let rep = match self {
Nil => "nil",
True => "true",
False => "false",
Constant => "constant",
Jump => "jump",
JumpIfFalse => "jump_if_false",
Pop => "pop",
PushBinding => "push_binding",
Store => "store",
Load => "load",
MatchNil => "match_nil",
MatchTrue => "match_true",
MatchFalse => "match_false",
MatchWord => "match_word",
ResetMatch => "reset_match",
PanicIfNoMatch => "panic_if_no_match",
MatchConstant => "match_constant",
MatchTuple => "match_tuple",
PushTuple => "push_tuple",
PushList => "push_list",
PushDict => "push_dict",
PushBox => "push_box",
GetKey => "get_key",
PanicNoWhen => "panic_no_when",
};
write!(f, "{rep}")
2024-12-16 04:28:57 +00:00
}
}
#[derive(Clone, Debug, PartialEq)]
2024-12-18 04:45:39 +00:00
pub struct Binding {
2024-12-16 04:28:57 +00:00
name: &'static str,
2024-12-18 04:45:39 +00:00
depth: isize,
2024-12-16 04:28:57 +00:00
}
#[derive(Clone, Debug, PartialEq)]
pub struct Chunk<'a> {
2024-12-18 04:45:39 +00:00
pub bindings: Vec<Binding>,
scope_depth: isize,
num_bindings: usize,
2024-12-16 04:28:57 +00:00
pub constants: Vec<Value>,
pub bytecode: Vec<u8>,
pub spans: Vec<SimpleSpan>,
pub strings: Vec<&'static str>,
pub keywords: Vec<&'static str>,
pub nodes: Vec<&'a Ast>,
pub ast: &'a Ast,
pub span: SimpleSpan,
pub src: &'static str,
pub name: &'static str,
}
2024-12-18 06:28:23 +00:00
fn is_binding(expr: &Spanned<Ast>) -> bool {
let (ast, _) = expr;
use Ast::*;
match ast {
Let(..) | LBox(..) => true,
Fn(name, ..) => *name != "*anon",
_ => false,
}
}
impl<'a> Chunk<'a> {
2024-12-16 04:28:57 +00:00
pub fn new(ast: &'a Spanned<Ast>, name: &'static str, src: &'static str) -> Chunk<'a> {
Chunk {
2024-12-18 04:45:39 +00:00
bindings: vec![],
scope_depth: -1,
num_bindings: 0,
2024-12-16 04:28:57 +00:00
constants: vec![],
bytecode: vec![],
spans: vec![],
strings: vec![],
keywords: vec![
2024-12-16 04:49:27 +00:00
"nil", "bool", "number", "keyword", "string", "tuple", "list", "dict", "box", "fn",
2024-12-16 04:28:57 +00:00
],
nodes: vec![],
ast: &ast.0,
span: ast.1,
src,
name,
}
}
pub fn kw_from(&self, kw: &str) -> Option<Value> {
2024-12-23 00:07:42 +00:00
self.kw_index_from(kw).map(Value::Keyword)
}
pub fn kw_index_from(&self, kw: &str) -> Option<usize> {
self.keywords.iter().position(|s| *s == kw)
2024-12-16 04:28:57 +00:00
}
pub fn visit(&mut self, node: &'a Spanned<Ast>) {
let root_node = self.ast;
let root_span = self.span;
let (ast, span) = node;
self.ast = ast;
self.span = *span;
self.compile();
self.ast = root_node;
self.span = root_span;
}
2024-12-16 04:28:57 +00:00
fn emit_constant(&mut self, val: Value) {
let constant_index = self.constants.len();
if constant_index > u8::MAX as usize {
panic!(
"internal Ludus compiler error: too many constants in chunk:{}:: {}",
self.span, self.ast
)
}
self.constants.push(val);
self.bytecode.push(Op::Constant as u8);
self.spans.push(self.span);
self.bytecode.push(constant_index as u8);
self.spans.push(self.span);
}
2024-12-23 00:07:42 +00:00
fn match_constant(&mut self, val: Value) {
let constant_index = match self.constants.iter().position(|v| *v == val) {
Some(idx) => idx,
None => self.constants.len(),
};
if constant_index > u8::MAX as usize {
panic!(
"internal Ludus compiler error: too many constants in chunk:{}:: {}",
self.span, self.ast
)
}
if constant_index == self.constants.len() {
self.constants.push(val);
}
self.bytecode.push(Op::MatchConstant as u8);
self.spans.push(self.span);
self.bytecode.push(constant_index as u8);
self.spans.push(self.span);
self.bind("*constant");
}
fn emit_op(&mut self, op: Op) {
self.bytecode.push(op as u8);
self.spans.push(self.span);
}
2024-12-18 04:45:39 +00:00
fn bind(&mut self, name: &'static str) {
self.bindings.push(Binding {
name,
depth: self.scope_depth,
});
}
2024-12-16 04:28:57 +00:00
pub fn compile(&mut self) {
use Ast::*;
match self.ast {
2024-12-23 00:07:42 +00:00
Nil => self.emit_op(Op::Nil),
2024-12-16 04:28:57 +00:00
Number(n) => self.emit_constant(Value::Number(*n)),
2024-12-23 00:07:42 +00:00
Boolean(b) => self.emit_op(if *b { Op::True } else { Op::False }),
2024-12-16 04:28:57 +00:00
String(s) => {
2024-12-23 00:07:42 +00:00
let existing_str = self.strings.iter().position(|e| e == s);
let str_index = match existing_str {
Some(idx) => idx,
None => self.strings.len(),
};
self.strings.push(s);
2024-12-16 04:28:57 +00:00
self.emit_constant(Value::Interned(str_index));
}
2024-12-16 04:28:57 +00:00
Keyword(s) => {
let existing_kw = self.keywords.iter().position(|kw| kw == s);
let kw_index = match existing_kw {
Some(index) => index,
None => self.keywords.len(),
};
2024-12-23 00:07:42 +00:00
if kw_index == self.keywords.len() {
self.keywords.push(s);
}
2024-12-16 04:28:57 +00:00
self.emit_constant(Value::Keyword(kw_index));
}
Block(lines) => {
self.scope_depth += 1;
2024-12-18 06:28:23 +00:00
for expr in lines.iter().take(lines.len() - 1) {
if is_binding(expr) {
self.visit(expr)
} else {
self.visit(expr);
self.emit_op(Op::Pop);
}
}
2024-12-18 06:28:23 +00:00
self.visit(lines.last().unwrap());
2024-12-18 04:45:39 +00:00
self.emit_op(Op::Store);
2024-12-16 04:28:57 +00:00
self.scope_depth -= 1;
2024-12-18 04:45:39 +00:00
while let Some(binding) = self.bindings.last() {
if binding.depth > self.scope_depth {
self.emit_op(Op::Pop);
self.bindings.pop();
} else {
break;
}
}
self.emit_op(Op::Load);
2024-12-16 04:28:57 +00:00
}
If(cond, then, r#else) => {
self.visit(cond);
let jif_idx = self.bytecode.len();
self.emit_op(Op::JumpIfFalse);
self.bytecode.push(0xff);
self.visit(then);
let jump_idx = self.bytecode.len();
self.emit_op(Op::Jump);
self.bytecode.push(0xff);
self.visit(r#else);
let end_idx = self.bytecode.len();
let jif_offset = jump_idx - jif_idx;
let jump_offset = end_idx - jump_idx;
self.bytecode[jif_idx + 1] = jif_offset as u8;
self.bytecode[jump_idx + 1] = jump_offset as u8;
}
2024-12-18 04:45:39 +00:00
Let(patt, expr) => {
2024-12-23 00:07:42 +00:00
self.emit_op(Op::ResetMatch);
2024-12-18 04:45:39 +00:00
self.visit(expr);
self.visit(patt);
2024-12-23 00:07:42 +00:00
self.emit_op(Op::PanicIfNoMatch);
2024-12-18 04:45:39 +00:00
}
WordPattern(name) => {
2024-12-23 00:07:42 +00:00
self.emit_op(Op::MatchWord);
2024-12-18 04:45:39 +00:00
self.bind(name);
}
Word(name) => {
self.emit_op(Op::PushBinding);
let biter = self.bindings.iter().enumerate().rev();
for (i, binding) in biter {
if binding.name == *name {
self.bytecode.push(i as u8);
break;
}
}
}
PlaceholderPattern => {
2024-12-23 00:07:42 +00:00
self.emit_op(Op::MatchWord);
2024-12-18 04:45:39 +00:00
self.bind("_");
}
2024-12-23 00:07:42 +00:00
NilPattern => {
self.emit_op(Op::MatchNil);
self.bind("nil");
}
BooleanPattern(b) => {
if *b {
self.emit_op(Op::MatchTrue);
self.bind("true");
} else {
self.emit_op(Op::MatchFalse);
self.bind("false");
}
}
NumberPattern(n) => {
self.match_constant(Value::Number(*n));
}
KeywordPattern(s) => {
let existing_kw = self.keywords.iter().position(|kw| kw == s);
let kw_index = match existing_kw {
Some(index) => index,
None => self.keywords.len(),
};
if kw_index == self.keywords.len() {
self.keywords.push(s);
}
self.match_constant(Value::Keyword(kw_index));
}
StringPattern(s) => {
let existing_str = self.strings.iter().position(|e| e == s);
let str_index = match existing_str {
Some(idx) => idx,
None => self.strings.len(),
};
if str_index == self.strings.len() {
self.strings.push(s)
}
self.match_constant(Value::Interned(str_index));
}
Tuple(members) => {
for member in members {
self.visit(member);
}
self.emit_op(Op::PushTuple);
self.bytecode.push(members.len() as u8);
}
List(members) => {
for member in members {
self.visit(member);
}
self.emit_op(Op::PushList);
self.bytecode.push(members.len() as u8);
}
LBox(name, expr) => {
self.visit(expr);
self.emit_op(Op::PushBox);
self.bind(name);
}
Dict(pairs) => {
for pair in pairs {
self.visit(pair);
}
self.emit_op(Op::PushDict);
self.bytecode.push(pairs.len() as u8);
}
Pair(key, value) => {
let existing_kw = self.keywords.iter().position(|kw| kw == key);
let kw_index = match existing_kw {
Some(index) => index,
None => self.keywords.len(),
};
if kw_index == self.keywords.len() {
self.keywords.push(key);
}
self.emit_constant(Value::Keyword(kw_index));
self.visit(value);
}
Synthetic(first, second, rest) => {
match (&first.0, &second.0) {
(Word(_), Keyword(_)) => {
self.visit(first);
self.visit(second);
self.emit_op(Op::GetKey);
}
(Keyword(_), Arguments(args)) => {
self.visit(&args[0]);
self.visit(first);
self.emit_op(Op::GetKey);
}
(Word(_), Arguments(_)) => {
todo!()
}
_ => unreachable!(),
}
for term in rest {
todo!()
}
}
When(clauses) => {
let mut jump_idxes = vec![];
let mut clauses = clauses.iter();
while let Some((WhenClause(cond, body), _)) = clauses.next() {
self.visit(cond.as_ref());
self.emit_op(Op::JumpIfFalse);
let jif_jump_idx = self.bytecode.len();
self.bytecode.push(0xff);
self.visit(body);
self.emit_op(Op::Jump);
jump_idxes.push(self.bytecode.len());
self.bytecode.push(0xff);
self.bytecode[jif_jump_idx] =
self.bytecode.len() as u8 - jif_jump_idx as u8 - 1;
}
self.emit_op(Op::PanicNoWhen);
for idx in jump_idxes {
self.bytecode[idx] = self.bytecode.len() as u8 - idx as u8 + 1;
}
}
_ => todo!(),
}
}
2024-12-16 04:28:57 +00:00
pub fn disassemble(&self) {
println!("=== chunk: {} ===", self.name);
2024-12-16 04:28:57 +00:00
println!("IDX | CODE | INFO");
let mut codes = self.bytecode.iter().enumerate();
while let Some((i, byte)) = codes.next() {
2024-12-16 04:28:57 +00:00
let op = Op::from_u8(*byte).unwrap();
use Op::*;
match op {
2024-12-23 00:07:42 +00:00
Pop | Store | Load | Nil | True | False | MatchNil | MatchTrue | MatchFalse
| MatchWord | ResetMatch | PanicIfNoMatch | GetKey | PanicNoWhen => {
println!("{i:04}: {op}")
}
Constant | MatchConstant => {
2024-12-16 04:28:57 +00:00
let (_, next) = codes.next().unwrap();
let value = &self.constants[*next as usize].show(self);
println!("{i:04}: {:16} {next:04}: {value}", op.to_string());
}
2024-12-23 00:07:42 +00:00
PushBinding | MatchTuple | PushTuple | PushDict | PushList | PushBox => {
2024-12-18 04:45:39 +00:00
let (_, next) = codes.next().unwrap();
println!("{i:04}: {:16} {next:04}", op.to_string());
}
2024-12-16 04:28:57 +00:00
Jump | JumpIfFalse => {
let (_, next) = codes.next().unwrap();
2024-12-16 04:28:57 +00:00
println!("{i:04}: {:16} {next:04}", op.to_string())
}
}
}
}
2024-12-18 06:28:23 +00:00
pub fn dissasemble_instr(&self, i: usize) {
let op = Op::from_u8(self.bytecode[i]).unwrap();
use Op::*;
match op {
2024-12-23 00:07:42 +00:00
Pop | Store | Load | Nil | True | False | MatchNil | MatchTrue | MatchFalse
| PanicIfNoMatch | MatchWord | ResetMatch | GetKey | PanicNoWhen => {
println!("{i:04}: {op}")
}
Constant | MatchConstant => {
2024-12-18 06:28:23 +00:00
let next = self.bytecode[i + 1];
let value = &self.constants[next as usize].show(self);
println!("{i:04}: {:16} {next:04}: {value}", op.to_string());
}
2024-12-23 00:07:42 +00:00
PushBinding | MatchTuple | PushTuple | PushDict | PushList | PushBox => {
2024-12-18 06:28:23 +00:00
let next = self.bytecode[i + 1];
println!("{i:04}: {:16} {next:04}", op.to_string());
}
Jump | JumpIfFalse => {
let next = self.bytecode[i + 1];
println!("{i:04}: {:16} {next:04}", op.to_string())
}
}
}
}