use crate::parser::Ast; use crate::spans::Spanned; use crate::value::*; use chumsky::prelude::SimpleSpan; use num_derive::{FromPrimitive, ToPrimitive}; use num_traits::FromPrimitive; use std::cell::OnceCell; use std::rc::Rc; #[derive(Copy, Clone, Debug, PartialEq, Eq, FromPrimitive, ToPrimitive)] pub enum Op { Nil, True, False, Constant, Jump, JumpIfFalse, Pop, PushBinding, Store, Load, ResetMatch, MatchNil, MatchTrue, MatchFalse, MatchWord, PanicIfNoMatch, MatchConstant, MatchTuple, PushTuple, PushList, PushDict, PushBox, GetKey, PanicNoWhen, JumpIfNoMatch, PanicNoMatch, TypeOf, JumpBack, JumpIfZero, Duplicate, Decrement, Truncate, MatchDepth, } impl std::fmt::Display for Op { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { use Op::*; let rep = match self { Nil => "nil", True => "true", False => "false", Constant => "constant", Jump => "jump", JumpIfFalse => "jump_if_false", Pop => "pop", PushBinding => "push_binding", Store => "store", Load => "load", MatchNil => "match_nil", MatchTrue => "match_true", MatchFalse => "match_false", MatchWord => "match_word", ResetMatch => "reset_match", PanicIfNoMatch => "panic_if_no_match", MatchConstant => "match_constant", MatchTuple => "match_tuple", PushTuple => "push_tuple", PushList => "push_list", PushDict => "push_dict", PushBox => "push_box", GetKey => "get_key", PanicNoWhen => "panic_no_when", JumpIfNoMatch => "jump_if_no_match", PanicNoMatch => "panic_no_match", TypeOf => "type_of", JumpBack => "jump_back", JumpIfZero => "jump_if_zero", Decrement => "decrement", Truncate => "truncate", Duplicate => "duplicate", MatchDepth => "match_depth", }; write!(f, "{rep}") } } #[derive(Clone, Debug, PartialEq)] pub struct Binding { name: &'static str, depth: isize, } #[derive(Clone, Debug, PartialEq)] pub struct Chunk { pub constants: Vec, pub bytecode: Vec, pub strings: Vec<&'static str>, pub keywords: Vec<&'static str>, } impl Chunk { pub fn dissasemble_instr(&self, i: usize) { let op = Op::from_u8(self.bytecode[i]).unwrap(); use Op::*; match op { Pop | Store | Load | Nil | True | False | MatchNil | MatchTrue | MatchFalse | PanicIfNoMatch | MatchWord | ResetMatch | GetKey | PanicNoWhen | PanicNoMatch | TypeOf | Duplicate | Decrement | Truncate => { println!("{i:04}: {op}") } Constant | MatchConstant => { let next = self.bytecode[i + 1]; let value = &self.constants[next as usize].show(self); println!("{i:04}: {:16} {next:04}: {value}", op.to_string()); } PushBinding | MatchTuple | PushTuple | PushDict | PushList | PushBox | Jump | JumpIfFalse | JumpIfNoMatch | JumpBack | JumpIfZero | MatchDepth => { let next = self.bytecode[i + 1]; println!("{i:04}: {:16} {next:04}", op.to_string()); } } } pub fn kw_from(&self, kw: &str) -> Option { self.kw_index_from(kw).map(Value::Keyword) } pub fn kw_index_from(&self, kw: &str) -> Option { self.keywords.iter().position(|s| *s == kw) } } pub struct Compiler { pub chunk: Chunk, pub bindings: Vec, scope_depth: isize, num_bindings: usize, pub spans: Vec, pub nodes: Vec<&'static Ast>, pub ast: &'static Ast, pub span: SimpleSpan, pub src: &'static str, pub name: &'static str, loop_idxes: Vec, } fn is_binding(expr: &Spanned) -> bool { let (ast, _) = expr; use Ast::*; match ast { Let(..) | LBox(..) => true, Fn(name, ..) => !name.is_empty(), _ => false, } } impl Compiler { pub fn new(ast: &'static Spanned, name: &'static str, src: &'static str) -> Compiler { let chunk = Chunk { constants: vec![], bytecode: vec![], strings: vec![], keywords: vec![ "nil", "bool", "number", "keyword", "string", "tuple", "list", "dict", "box", "fn", ], }; Compiler { chunk, bindings: vec![], scope_depth: -1, num_bindings: 0, spans: vec![], nodes: vec![], ast: &ast.0, span: ast.1, loop_idxes: vec![], src, name, } } pub fn kw_from(&self, kw: &str) -> Option { self.kw_index_from(kw).map(Value::Keyword) } pub fn kw_index_from(&self, kw: &str) -> Option { self.chunk.keywords.iter().position(|s| *s == kw) } pub fn visit(&mut self, node: &'static Spanned) { let root_node = self.ast; let root_span = self.span; let (ast, span) = node; self.ast = ast; self.span = *span; self.compile(); self.ast = root_node; self.span = root_span; } fn emit_constant(&mut self, val: Value) { let constant_index = self.chunk.constants.len(); if constant_index > u8::MAX as usize { panic!( "internal Ludus compiler error: too many constants in chunk:{}:: {}", self.span, self.ast ) } self.chunk.constants.push(val); self.chunk.bytecode.push(Op::Constant as u8); self.spans.push(self.span); self.chunk.bytecode.push(constant_index as u8); self.spans.push(self.span); } fn match_constant(&mut self, val: Value) { let constant_index = match self.chunk.constants.iter().position(|v| *v == val) { Some(idx) => idx, None => self.chunk.constants.len(), }; if constant_index > u8::MAX as usize { panic!( "internal Ludus compiler error: too many constants in chunk:{}:: {}", self.span, self.ast ) } if constant_index == self.chunk.constants.len() { self.chunk.constants.push(val); } self.chunk.bytecode.push(Op::MatchConstant as u8); self.spans.push(self.span); self.chunk.bytecode.push(constant_index as u8); self.spans.push(self.span); self.bind(""); } fn emit_op(&mut self, op: Op) { self.chunk.bytecode.push(op as u8); self.spans.push(self.span); } fn emit_byte(&mut self, byte: usize) { self.chunk.bytecode.push(byte as u8); self.spans.push(self.span); } fn len(&self) -> usize { self.chunk.bytecode.len() } fn bind(&mut self, name: &'static str) { self.bindings.push(Binding { name, depth: self.scope_depth, }); } fn enter_loop(&mut self) { self.loop_idxes.push(self.len()); } fn leave_loop(&mut self) { self.loop_idxes.pop(); } fn loop_idx(&mut self) -> usize { *self.loop_idxes.last().unwrap() } pub fn compile(&mut self) { use Ast::*; match self.ast { Error => unreachable!(), Nil => self.emit_op(Op::Nil), Number(n) => self.emit_constant(Value::Number(*n)), Boolean(b) => self.emit_op(if *b { Op::True } else { Op::False }), String(s) => { let existing_str = self.chunk.strings.iter().position(|e| e == s); let str_index = match existing_str { Some(idx) => idx, None => self.chunk.strings.len(), }; self.chunk.strings.push(s); self.emit_constant(Value::Interned(str_index)); } Keyword(s) => { let existing_kw = self.chunk.keywords.iter().position(|kw| kw == s); let kw_index = match existing_kw { Some(index) => index, None => self.chunk.keywords.len(), }; if kw_index == self.chunk.keywords.len() { self.chunk.keywords.push(s); } self.emit_constant(Value::Keyword(kw_index)); } Block(lines) => { self.scope_depth += 1; for expr in lines.iter().take(lines.len() - 1) { if is_binding(expr) { self.visit(expr); } else { self.visit(expr); self.emit_op(Op::Pop); } } let last_expr = lines.last().unwrap(); if is_binding(last_expr) { self.visit(last_expr); self.emit_op(Op::Duplicate); } else { self.visit(last_expr); } self.emit_op(Op::Store); self.scope_depth -= 1; while let Some(binding) = self.bindings.last() { if binding.depth > self.scope_depth { self.emit_op(Op::Pop); self.bindings.pop(); } else { break; } } self.emit_op(Op::Pop); self.emit_op(Op::Load); } If(cond, then, r#else) => { self.visit(cond); let jif_idx = self.len(); self.emit_op(Op::JumpIfFalse); self.emit_byte(0xff); self.visit(then); let jump_idx = self.len(); self.emit_op(Op::Jump); self.emit_byte(0xff); self.visit(r#else); let end_idx = self.len(); let jif_offset = jump_idx - jif_idx; let jump_offset = end_idx - jump_idx - 2; self.chunk.bytecode[jif_idx + 1] = jif_offset as u8; self.chunk.bytecode[jump_idx + 1] = jump_offset as u8; } Let(patt, expr) => { self.emit_op(Op::ResetMatch); self.visit(expr); self.visit(patt); self.emit_op(Op::PanicIfNoMatch); } WordPattern(name) => { self.emit_op(Op::MatchWord); self.bind(name); } Word(name) => { self.emit_op(Op::PushBinding); let biter = self.bindings.iter().enumerate().rev(); for (i, binding) in biter { if binding.name == *name { self.emit_byte(i); break; } } } PlaceholderPattern => { self.emit_op(Op::MatchWord); self.bind(""); } NilPattern => { self.emit_op(Op::MatchNil); self.bind(""); } BooleanPattern(b) => { if *b { self.emit_op(Op::MatchTrue); self.bind(""); } else { self.emit_op(Op::MatchFalse); self.bind(""); } } NumberPattern(n) => { self.match_constant(Value::Number(*n)); } KeywordPattern(s) => { let existing_kw = self.chunk.keywords.iter().position(|kw| kw == s); let kw_index = match existing_kw { Some(index) => index, None => self.chunk.keywords.len(), }; if kw_index == self.chunk.keywords.len() { self.chunk.keywords.push(s); } self.match_constant(Value::Keyword(kw_index)); } StringPattern(s) => { let existing_str = self.chunk.strings.iter().position(|e| e == s); let str_index = match existing_str { Some(idx) => idx, None => self.chunk.strings.len(), }; if str_index == self.chunk.strings.len() { self.chunk.strings.push(s) } self.match_constant(Value::Interned(str_index)); } Tuple(members) => { for member in members { self.visit(member); } self.emit_op(Op::PushTuple); self.emit_byte(members.len()); } List(members) => { for member in members { self.visit(member); } self.emit_op(Op::PushList); self.emit_byte(members.len()); } LBox(name, expr) => { self.visit(expr); self.emit_op(Op::PushBox); self.bind(name); } Dict(pairs) => { for pair in pairs { self.visit(pair); } self.emit_op(Op::PushDict); self.emit_byte(pairs.len()); } Pair(key, value) => { let existing_kw = self.chunk.keywords.iter().position(|kw| kw == key); let kw_index = match existing_kw { Some(index) => index, None => self.chunk.keywords.len(), }; if kw_index == self.chunk.keywords.len() { self.chunk.keywords.push(key); } self.emit_constant(Value::Keyword(kw_index)); self.visit(value); } Synthetic(first, second, rest) => { match (&first.0, &second.0) { (Word(_), Keyword(_)) => { self.visit(first); self.visit(second); self.emit_op(Op::GetKey); } (Keyword(_), Arguments(args)) => { self.visit(&args[0]); self.visit(first); self.emit_op(Op::GetKey); } (Word(_), Arguments(_)) => { todo!() } _ => unreachable!(), } // TODO: implement longer synthetic expressions for term in rest { todo!() } } When(clauses) => { let mut jump_idxes = vec![]; let mut clauses = clauses.iter(); while let Some((WhenClause(cond, body), _)) = clauses.next() { self.visit(cond.as_ref()); self.emit_op(Op::JumpIfFalse); let jif_jump_idx = self.len(); self.emit_byte(0xff); self.visit(body); self.emit_op(Op::Jump); jump_idxes.push(self.len()); self.emit_byte(0xff); self.chunk.bytecode[jif_jump_idx] = self.len() as u8 - jif_jump_idx as u8 - 1; } self.emit_op(Op::PanicNoWhen); for idx in jump_idxes { self.chunk.bytecode[idx] = self.len() as u8 - idx as u8 + 1; } } WhenClause(..) => unreachable!(), Match(scrutinee, clauses) => { self.visit(scrutinee.as_ref()); let mut jump_idxes = vec![]; let mut clauses = clauses.iter(); while let Some((MatchClause(pattern, guard, body), _)) = clauses.next() { self.scope_depth += 1; self.visit(pattern); self.emit_op(Op::JumpIfNoMatch); let jnm_jump_idx = self.len(); self.emit_byte(0xff); // conditional compilation of guards // hard to DRY out match guard.as_ref() { Some(expr) => { self.visit(expr); self.emit_op(Op::JumpIfFalse); let jif_idx = self.len(); self.emit_byte(0xff); self.visit(body); self.emit_op(Op::Store); self.scope_depth -= 1; while let Some(binding) = self.bindings.last() { if binding.depth > self.scope_depth { self.emit_op(Op::Pop); self.bindings.pop(); } else { break; } } self.emit_op(Op::Jump); jump_idxes.push(self.len()); self.emit_byte(0xff); self.chunk.bytecode[jnm_jump_idx] = self.len() as u8 - jnm_jump_idx as u8 - 1; self.chunk.bytecode[jif_idx] = self.len() as u8 - jif_idx as u8 - 1; } None => { self.visit(body); self.emit_op(Op::Store); self.scope_depth -= 1; while let Some(binding) = self.bindings.last() { if binding.depth > self.scope_depth { self.emit_op(Op::Pop); self.bindings.pop(); } else { break; } } self.emit_op(Op::Jump); jump_idxes.push(self.len()); self.emit_byte(0xff); self.chunk.bytecode[jnm_jump_idx] = self.len() as u8 - jnm_jump_idx as u8 - 1; } } } self.emit_op(Op::PanicNoMatch); self.emit_op(Op::Load); for idx in jump_idxes { self.chunk.bytecode[idx] = self.len() as u8 - idx as u8; } } MatchClause(..) => unreachable!(), Fn(name, body, doc) => { // first, declare the function // TODO: or, check if the function has already been declared! let init_val = Value::Fn(Rc::new(OnceCell::new())); self.emit_constant(init_val); self.bind(name); // compile the function let mut compiler = Compiler::new(body, self.name, self.src); compiler.compile(); if crate::DEBUG_COMPILE { println!("==function: {name}=="); compiler.disassemble(); } let lfn = crate::value::LFn { name, doc: *doc, chunk: compiler.chunk, closed: vec![], }; // TODO: close over everything accessed in the function // TODO: pull the function off the stack, and set the OnceCell. } FnDeclaration(name) => { let lfn = Value::Fn(Rc::new(OnceCell::new())); self.emit_constant(lfn); self.bind(name); } FnBody(clauses) => { self.emit_op(Op::ResetMatch); } Repeat(times, body) => { self.visit(times); self.emit_op(Op::Truncate); // skip the decrement the first time self.emit_op(Op::Jump); self.emit_byte(1); // begin repeat self.emit_op(Op::Decrement); let repeat_begin = self.len(); self.emit_op(Op::Duplicate); self.emit_op(Op::JumpIfZero); self.emit_byte(0xff); // compile the body self.visit(body); // pop whatever value the body returns self.emit_op(Op::Pop); self.emit_op(Op::JumpBack); // set jump points let repeat_end = self.len(); self.emit_byte(repeat_end - repeat_begin); self.chunk.bytecode[repeat_begin + 2] = (repeat_end - repeat_begin - 2) as u8; // pop the counter self.emit_op(Op::Pop); // and emit nil self.emit_constant(Value::Nil); } Loop(value, clauses) => { //algo: //first, put the values on the stack let (Ast::Tuple(members), _) = value.as_ref() else { unreachable!() }; for member in members { self.visit(member); } let arity = members.len(); //then, save the beginning of the loop self.enter_loop(); self.emit_op(Op::ResetMatch); //next, compile each clause: let mut clauses = clauses.iter(); while let Some((Ast::MatchClause(pattern, _, body), _)) = clauses.next() { self.scope_depth += 1; let (Ast::TuplePattern(members), _) = pattern.as_ref() else { unreachable!() }; } //match against the values on the stack //we know the (fixed) arity, so we should know where to look //compile the clauses exactly as in `match` } Recur(args) => {} Interpolated(..) | Arguments(..) | Placeholder | Panic(..) | Do(..) | Splat(..) | InterpolatedPattern(..) | AsPattern(..) | Splattern(..) | TuplePattern(..) | ListPattern(..) | PairPattern(..) | DictPattern(..) => todo!(), } } pub fn disassemble(&self) { println!("=== chunk: {} ===", self.name); println!("IDX | CODE | INFO"); let mut codes = self.chunk.bytecode.iter().enumerate(); while let Some((i, byte)) = codes.next() { let op = Op::from_u8(*byte).unwrap(); use Op::*; match op { Pop | Store | Load | Nil | True | False | MatchNil | MatchTrue | MatchFalse | MatchWord | ResetMatch | PanicIfNoMatch | GetKey | PanicNoWhen | PanicNoMatch | TypeOf | Duplicate | Truncate | Decrement => { println!("{i:04}: {op}") } Constant | MatchConstant => { let (_, next) = codes.next().unwrap(); let value = &self.chunk.constants[*next as usize].show(&self.chunk); println!("{i:04}: {:16} {next:04}: {value}", op.to_string()); } PushBinding | MatchTuple | PushTuple | PushDict | PushList | PushBox | Jump | JumpIfFalse | JumpIfNoMatch | JumpBack | JumpIfZero | MatchDepth => { let (_, next) = codes.next().unwrap(); println!("{i:04}: {:16} {next:04}", op.to_string()); } } } } }