rudus/src/compiler.rs
2024-12-27 00:54:31 -05:00

689 lines
24 KiB
Rust

use crate::parser::Ast;
use crate::spans::Spanned;
use crate::value::*;
use chumsky::prelude::SimpleSpan;
use num_derive::{FromPrimitive, ToPrimitive};
use num_traits::FromPrimitive;
use std::cell::OnceCell;
use std::rc::Rc;
#[derive(Copy, Clone, Debug, PartialEq, Eq, FromPrimitive, ToPrimitive)]
pub enum Op {
Nil,
True,
False,
Constant,
Jump,
JumpIfFalse,
Pop,
PushBinding,
Store,
Load,
ResetMatch,
MatchNil,
MatchTrue,
MatchFalse,
MatchWord,
PanicIfNoMatch,
MatchConstant,
MatchTuple,
PushTuple,
PushList,
PushDict,
PushBox,
GetKey,
PanicNoWhen,
JumpIfNoMatch,
PanicNoMatch,
TypeOf,
JumpBack,
JumpIfZero,
Duplicate,
Decrement,
Truncate,
MatchDepth,
}
impl std::fmt::Display for Op {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
use Op::*;
let rep = match self {
Nil => "nil",
True => "true",
False => "false",
Constant => "constant",
Jump => "jump",
JumpIfFalse => "jump_if_false",
Pop => "pop",
PushBinding => "push_binding",
Store => "store",
Load => "load",
MatchNil => "match_nil",
MatchTrue => "match_true",
MatchFalse => "match_false",
MatchWord => "match_word",
ResetMatch => "reset_match",
PanicIfNoMatch => "panic_if_no_match",
MatchConstant => "match_constant",
MatchTuple => "match_tuple",
PushTuple => "push_tuple",
PushList => "push_list",
PushDict => "push_dict",
PushBox => "push_box",
GetKey => "get_key",
PanicNoWhen => "panic_no_when",
JumpIfNoMatch => "jump_if_no_match",
PanicNoMatch => "panic_no_match",
TypeOf => "type_of",
JumpBack => "jump_back",
JumpIfZero => "jump_if_zero",
Decrement => "decrement",
Truncate => "truncate",
Duplicate => "duplicate",
MatchDepth => "match_depth",
};
write!(f, "{rep}")
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct Binding {
name: &'static str,
depth: isize,
}
#[derive(Clone, Debug, PartialEq)]
pub struct Chunk {
pub constants: Vec<Value>,
pub bytecode: Vec<u8>,
pub strings: Vec<&'static str>,
pub keywords: Vec<&'static str>,
}
impl Chunk {
pub fn dissasemble_instr(&self, i: usize) {
let op = Op::from_u8(self.bytecode[i]).unwrap();
use Op::*;
match op {
Pop | Store | Load | Nil | True | False | MatchNil | MatchTrue | MatchFalse
| PanicIfNoMatch | MatchWord | ResetMatch | GetKey | PanicNoWhen | PanicNoMatch
| TypeOf | Duplicate | Decrement | Truncate => {
println!("{i:04}: {op}")
}
Constant | MatchConstant => {
let next = self.bytecode[i + 1];
let value = &self.constants[next as usize].show(self);
println!("{i:04}: {:16} {next:04}: {value}", op.to_string());
}
PushBinding | MatchTuple | PushTuple | PushDict | PushList | PushBox | Jump
| JumpIfFalse | JumpIfNoMatch | JumpBack | JumpIfZero | MatchDepth => {
let next = self.bytecode[i + 1];
println!("{i:04}: {:16} {next:04}", op.to_string());
}
}
}
pub fn kw_from(&self, kw: &str) -> Option<Value> {
self.kw_index_from(kw).map(Value::Keyword)
}
pub fn kw_index_from(&self, kw: &str) -> Option<usize> {
self.keywords.iter().position(|s| *s == kw)
}
}
pub struct Compiler {
pub chunk: Chunk,
pub bindings: Vec<Binding>,
scope_depth: isize,
num_bindings: usize,
pub spans: Vec<SimpleSpan>,
pub nodes: Vec<&'static Ast>,
pub ast: &'static Ast,
pub span: SimpleSpan,
pub src: &'static str,
pub name: &'static str,
loop_idxes: Vec<usize>,
}
fn is_binding(expr: &Spanned<Ast>) -> bool {
let (ast, _) = expr;
use Ast::*;
match ast {
Let(..) | LBox(..) => true,
Fn(name, ..) => !name.is_empty(),
_ => false,
}
}
impl Compiler {
pub fn new(ast: &'static Spanned<Ast>, name: &'static str, src: &'static str) -> Compiler {
let chunk = Chunk {
constants: vec![],
bytecode: vec![],
strings: vec![],
keywords: vec![
"nil", "bool", "number", "keyword", "string", "tuple", "list", "dict", "box", "fn",
],
};
Compiler {
chunk,
bindings: vec![],
scope_depth: -1,
num_bindings: 0,
spans: vec![],
nodes: vec![],
ast: &ast.0,
span: ast.1,
loop_idxes: vec![],
src,
name,
}
}
pub fn kw_from(&self, kw: &str) -> Option<Value> {
self.kw_index_from(kw).map(Value::Keyword)
}
pub fn kw_index_from(&self, kw: &str) -> Option<usize> {
self.chunk.keywords.iter().position(|s| *s == kw)
}
pub fn visit(&mut self, node: &'static Spanned<Ast>) {
let root_node = self.ast;
let root_span = self.span;
let (ast, span) = node;
self.ast = ast;
self.span = *span;
self.compile();
self.ast = root_node;
self.span = root_span;
}
fn emit_constant(&mut self, val: Value) {
let constant_index = self.chunk.constants.len();
if constant_index > u8::MAX as usize {
panic!(
"internal Ludus compiler error: too many constants in chunk:{}:: {}",
self.span, self.ast
)
}
self.chunk.constants.push(val);
self.chunk.bytecode.push(Op::Constant as u8);
self.spans.push(self.span);
self.chunk.bytecode.push(constant_index as u8);
self.spans.push(self.span);
}
fn match_constant(&mut self, val: Value) {
let constant_index = match self.chunk.constants.iter().position(|v| *v == val) {
Some(idx) => idx,
None => self.chunk.constants.len(),
};
if constant_index > u8::MAX as usize {
panic!(
"internal Ludus compiler error: too many constants in chunk:{}:: {}",
self.span, self.ast
)
}
if constant_index == self.chunk.constants.len() {
self.chunk.constants.push(val);
}
self.chunk.bytecode.push(Op::MatchConstant as u8);
self.spans.push(self.span);
self.chunk.bytecode.push(constant_index as u8);
self.spans.push(self.span);
self.bind("");
}
fn emit_op(&mut self, op: Op) {
self.chunk.bytecode.push(op as u8);
self.spans.push(self.span);
}
fn emit_byte(&mut self, byte: usize) {
self.chunk.bytecode.push(byte as u8);
self.spans.push(self.span);
}
fn len(&self) -> usize {
self.chunk.bytecode.len()
}
fn bind(&mut self, name: &'static str) {
self.bindings.push(Binding {
name,
depth: self.scope_depth,
});
}
fn enter_loop(&mut self) {
self.loop_idxes.push(self.len());
}
fn leave_loop(&mut self) {
self.loop_idxes.pop();
}
fn loop_idx(&mut self) -> usize {
*self.loop_idxes.last().unwrap()
}
pub fn compile(&mut self) {
use Ast::*;
match self.ast {
Error => unreachable!(),
Nil => self.emit_op(Op::Nil),
Number(n) => self.emit_constant(Value::Number(*n)),
Boolean(b) => self.emit_op(if *b { Op::True } else { Op::False }),
String(s) => {
let existing_str = self.chunk.strings.iter().position(|e| e == s);
let str_index = match existing_str {
Some(idx) => idx,
None => self.chunk.strings.len(),
};
self.chunk.strings.push(s);
self.emit_constant(Value::Interned(str_index));
}
Keyword(s) => {
let existing_kw = self.chunk.keywords.iter().position(|kw| kw == s);
let kw_index = match existing_kw {
Some(index) => index,
None => self.chunk.keywords.len(),
};
if kw_index == self.chunk.keywords.len() {
self.chunk.keywords.push(s);
}
self.emit_constant(Value::Keyword(kw_index));
}
Block(lines) => {
self.scope_depth += 1;
for expr in lines.iter().take(lines.len() - 1) {
if is_binding(expr) {
self.visit(expr);
} else {
self.visit(expr);
self.emit_op(Op::Pop);
}
}
let last_expr = lines.last().unwrap();
if is_binding(last_expr) {
self.visit(last_expr);
self.emit_op(Op::Duplicate);
} else {
self.visit(last_expr);
}
self.emit_op(Op::Store);
self.scope_depth -= 1;
while let Some(binding) = self.bindings.last() {
if binding.depth > self.scope_depth {
self.emit_op(Op::Pop);
self.bindings.pop();
} else {
break;
}
}
self.emit_op(Op::Pop);
self.emit_op(Op::Load);
}
If(cond, then, r#else) => {
self.visit(cond);
let jif_idx = self.len();
self.emit_op(Op::JumpIfFalse);
self.emit_byte(0xff);
self.visit(then);
let jump_idx = self.len();
self.emit_op(Op::Jump);
self.emit_byte(0xff);
self.visit(r#else);
let end_idx = self.len();
let jif_offset = jump_idx - jif_idx;
let jump_offset = end_idx - jump_idx - 2;
self.chunk.bytecode[jif_idx + 1] = jif_offset as u8;
self.chunk.bytecode[jump_idx + 1] = jump_offset as u8;
}
Let(patt, expr) => {
self.emit_op(Op::ResetMatch);
self.visit(expr);
self.visit(patt);
self.emit_op(Op::PanicIfNoMatch);
}
WordPattern(name) => {
self.emit_op(Op::MatchWord);
self.bind(name);
}
Word(name) => {
self.emit_op(Op::PushBinding);
let biter = self.bindings.iter().enumerate().rev();
for (i, binding) in biter {
if binding.name == *name {
self.emit_byte(i);
break;
}
}
}
PlaceholderPattern => {
self.emit_op(Op::MatchWord);
self.bind("");
}
NilPattern => {
self.emit_op(Op::MatchNil);
self.bind("");
}
BooleanPattern(b) => {
if *b {
self.emit_op(Op::MatchTrue);
self.bind("");
} else {
self.emit_op(Op::MatchFalse);
self.bind("");
}
}
NumberPattern(n) => {
self.match_constant(Value::Number(*n));
}
KeywordPattern(s) => {
let existing_kw = self.chunk.keywords.iter().position(|kw| kw == s);
let kw_index = match existing_kw {
Some(index) => index,
None => self.chunk.keywords.len(),
};
if kw_index == self.chunk.keywords.len() {
self.chunk.keywords.push(s);
}
self.match_constant(Value::Keyword(kw_index));
}
StringPattern(s) => {
let existing_str = self.chunk.strings.iter().position(|e| e == s);
let str_index = match existing_str {
Some(idx) => idx,
None => self.chunk.strings.len(),
};
if str_index == self.chunk.strings.len() {
self.chunk.strings.push(s)
}
self.match_constant(Value::Interned(str_index));
}
Tuple(members) => {
for member in members {
self.visit(member);
}
self.emit_op(Op::PushTuple);
self.emit_byte(members.len());
}
List(members) => {
for member in members {
self.visit(member);
}
self.emit_op(Op::PushList);
self.emit_byte(members.len());
}
LBox(name, expr) => {
self.visit(expr);
self.emit_op(Op::PushBox);
self.bind(name);
}
Dict(pairs) => {
for pair in pairs {
self.visit(pair);
}
self.emit_op(Op::PushDict);
self.emit_byte(pairs.len());
}
Pair(key, value) => {
let existing_kw = self.chunk.keywords.iter().position(|kw| kw == key);
let kw_index = match existing_kw {
Some(index) => index,
None => self.chunk.keywords.len(),
};
if kw_index == self.chunk.keywords.len() {
self.chunk.keywords.push(key);
}
self.emit_constant(Value::Keyword(kw_index));
self.visit(value);
}
Synthetic(first, second, rest) => {
match (&first.0, &second.0) {
(Word(_), Keyword(_)) => {
self.visit(first);
self.visit(second);
self.emit_op(Op::GetKey);
}
(Keyword(_), Arguments(args)) => {
self.visit(&args[0]);
self.visit(first);
self.emit_op(Op::GetKey);
}
(Word(_), Arguments(_)) => {
todo!()
}
_ => unreachable!(),
}
// TODO: implement longer synthetic expressions
for term in rest {
todo!()
}
}
When(clauses) => {
let mut jump_idxes = vec![];
let mut clauses = clauses.iter();
while let Some((WhenClause(cond, body), _)) = clauses.next() {
self.visit(cond.as_ref());
self.emit_op(Op::JumpIfFalse);
let jif_jump_idx = self.len();
self.emit_byte(0xff);
self.visit(body);
self.emit_op(Op::Jump);
jump_idxes.push(self.len());
self.emit_byte(0xff);
self.chunk.bytecode[jif_jump_idx] = self.len() as u8 - jif_jump_idx as u8 - 1;
}
self.emit_op(Op::PanicNoWhen);
for idx in jump_idxes {
self.chunk.bytecode[idx] = self.len() as u8 - idx as u8 + 1;
}
}
WhenClause(..) => unreachable!(),
Match(scrutinee, clauses) => {
self.visit(scrutinee.as_ref());
let mut jump_idxes = vec![];
let mut clauses = clauses.iter();
while let Some((MatchClause(pattern, guard, body), _)) = clauses.next() {
self.scope_depth += 1;
self.visit(pattern);
self.emit_op(Op::JumpIfNoMatch);
let jnm_jump_idx = self.len();
self.emit_byte(0xff);
// conditional compilation of guards
// hard to DRY out
match guard.as_ref() {
Some(expr) => {
self.visit(expr);
self.emit_op(Op::JumpIfFalse);
let jif_idx = self.len();
self.emit_byte(0xff);
self.visit(body);
self.emit_op(Op::Store);
self.scope_depth -= 1;
while let Some(binding) = self.bindings.last() {
if binding.depth > self.scope_depth {
self.emit_op(Op::Pop);
self.bindings.pop();
} else {
break;
}
}
self.emit_op(Op::Jump);
jump_idxes.push(self.len());
self.emit_byte(0xff);
self.chunk.bytecode[jnm_jump_idx] =
self.len() as u8 - jnm_jump_idx as u8 - 1;
self.chunk.bytecode[jif_idx] = self.len() as u8 - jif_idx as u8 - 1;
}
None => {
self.visit(body);
self.emit_op(Op::Store);
self.scope_depth -= 1;
while let Some(binding) = self.bindings.last() {
if binding.depth > self.scope_depth {
self.emit_op(Op::Pop);
self.bindings.pop();
} else {
break;
}
}
self.emit_op(Op::Jump);
jump_idxes.push(self.len());
self.emit_byte(0xff);
self.chunk.bytecode[jnm_jump_idx] =
self.len() as u8 - jnm_jump_idx as u8 - 1;
}
}
}
self.emit_op(Op::PanicNoMatch);
self.emit_op(Op::Load);
for idx in jump_idxes {
self.chunk.bytecode[idx] = self.len() as u8 - idx as u8;
}
}
MatchClause(..) => unreachable!(),
Fn(name, body, doc) => {
// first, declare the function
// TODO: or, check if the function has already been declared!
let init_val = Value::Fn(Rc::new(OnceCell::new()));
self.emit_constant(init_val);
self.bind(name);
// compile the function
let mut compiler = Compiler::new(body, self.name, self.src);
compiler.compile();
if crate::DEBUG_COMPILE {
println!("==function: {name}==");
compiler.disassemble();
}
let lfn = crate::value::LFn {
name,
doc: *doc,
chunk: compiler.chunk,
closed: vec![],
};
// TODO: close over everything accessed in the function
// TODO: pull the function off the stack, and set the OnceCell.
}
FnDeclaration(name) => {
let lfn = Value::Fn(Rc::new(OnceCell::new()));
self.emit_constant(lfn);
self.bind(name);
}
FnBody(clauses) => {
self.emit_op(Op::ResetMatch);
}
Repeat(times, body) => {
self.visit(times);
self.emit_op(Op::Truncate);
// skip the decrement the first time
self.emit_op(Op::Jump);
self.emit_byte(1);
// begin repeat
self.emit_op(Op::Decrement);
let repeat_begin = self.len();
self.emit_op(Op::Duplicate);
self.emit_op(Op::JumpIfZero);
self.emit_byte(0xff);
// compile the body
self.visit(body);
// pop whatever value the body returns
self.emit_op(Op::Pop);
self.emit_op(Op::JumpBack);
// set jump points
let repeat_end = self.len();
self.emit_byte(repeat_end - repeat_begin);
self.chunk.bytecode[repeat_begin + 2] = (repeat_end - repeat_begin - 2) as u8;
// pop the counter
self.emit_op(Op::Pop);
// and emit nil
self.emit_constant(Value::Nil);
}
Loop(value, clauses) => {
//algo:
//first, put the values on the stack
let (Ast::Tuple(members), _) = value.as_ref() else {
unreachable!()
};
for member in members {
self.visit(member);
}
let arity = members.len();
//then, save the beginning of the loop
self.enter_loop();
self.emit_op(Op::ResetMatch);
//next, compile each clause:
let mut clauses = clauses.iter();
while let Some((Ast::MatchClause(pattern, _, body), _)) = clauses.next() {
self.scope_depth += 1;
let (Ast::TuplePattern(members), _) = pattern.as_ref() else {
unreachable!()
};
// TODO: finish compiling match clauses
// I just added "match depth" to the VM
// this will set match depth to artiy
// and decrement it each pattern
// the compiler will need to know about match depth for binding to work
// we should match against ALL args first
// rather than jump_no_matching after every arg check
// compile the body
// and then jump_no_match to the next clause
// at the end, panic_no_match
}
//match against the values on the stack
//we know the (fixed) arity, so we should know where to look
//compile the clauses exactly as in `match`
}
Recur(args) => {}
Interpolated(..)
| Arguments(..)
| Placeholder
| Panic(..)
| Do(..)
| Splat(..)
| InterpolatedPattern(..)
| AsPattern(..)
| Splattern(..)
| TuplePattern(..)
| ListPattern(..)
| PairPattern(..)
| DictPattern(..) => todo!(),
}
}
pub fn disassemble(&self) {
println!("=== chunk: {} ===", self.name);
println!("IDX | CODE | INFO");
let mut codes = self.chunk.bytecode.iter().enumerate();
while let Some((i, byte)) = codes.next() {
let op = Op::from_u8(*byte).unwrap();
use Op::*;
match op {
Pop | Store | Load | Nil | True | False | MatchNil | MatchTrue | MatchFalse
| MatchWord | ResetMatch | PanicIfNoMatch | GetKey | PanicNoWhen | PanicNoMatch
| TypeOf | Duplicate | Truncate | Decrement => {
println!("{i:04}: {op}")
}
Constant | MatchConstant => {
let (_, next) = codes.next().unwrap();
let value = &self.chunk.constants[*next as usize].show(&self.chunk);
println!("{i:04}: {:16} {next:04}: {value}", op.to_string());
}
PushBinding | MatchTuple | PushTuple | PushDict | PushList | PushBox | Jump
| JumpIfFalse | JumpIfNoMatch | JumpBack | JumpIfZero | MatchDepth => {
let (_, next) = codes.next().unwrap();
println!("{i:04}: {:16} {next:04}", op.to_string());
}
}
}
}
}