use crate::parser::Ast; use crate::pattern::Pattern; use crate::spans::Spanned; use crate::value::*; use chumsky::prelude::SimpleSpan; use num_derive::{FromPrimitive, ToPrimitive}; use num_traits::FromPrimitive; use std::cell::OnceCell; use std::rc::Rc; #[derive(Copy, Clone, Debug, PartialEq, Eq, FromPrimitive, ToPrimitive)] pub enum Op { Noop, Nil, True, False, Constant, Jump, JumpIfFalse, Pop, PopN, PushBinding, Store, Load, ResetMatch, MatchNil, MatchTrue, MatchFalse, MatchWord, PanicIfNoMatch, MatchConstant, MatchTuple, PushTuple, LoadTuple, PushList, PushDict, PushBox, GetKey, PanicNoWhen, JumpIfNoMatch, JumpIfMatch, PanicNoMatch, TypeOf, JumpBack, JumpIfZero, Duplicate, Decrement, Truncate, MatchDepth, } impl std::fmt::Display for Op { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { use Op::*; let rep = match self { Noop => "noop", Nil => "nil", True => "true", False => "false", Constant => "constant", Jump => "jump", JumpIfFalse => "jump_if_false", Pop => "pop", PopN => "pop_n", PushBinding => "push_binding", Store => "store", Load => "load", MatchNil => "match_nil", MatchTrue => "match_true", MatchFalse => "match_false", MatchWord => "match_word", ResetMatch => "reset_match", PanicIfNoMatch => "panic_if_no_match", MatchConstant => "match_constant", MatchTuple => "match_tuple", PushTuple => "push_tuple", LoadTuple => "load_tuple", PushList => "push_list", PushDict => "push_dict", PushBox => "push_box", GetKey => "get_key", PanicNoWhen => "panic_no_when", JumpIfNoMatch => "jump_if_no_match", JumpIfMatch => "jump_if_match", PanicNoMatch => "panic_no_match", TypeOf => "type_of", JumpBack => "jump_back", JumpIfZero => "jump_if_zero", Decrement => "decrement", Truncate => "truncate", Duplicate => "duplicate", MatchDepth => "match_depth", }; write!(f, "{rep}") } } #[derive(Clone, Debug, PartialEq)] pub struct Binding { name: &'static str, depth: isize, stack_pos: usize, } #[derive(Clone, Debug, PartialEq)] pub struct Chunk { pub constants: Vec, pub bytecode: Vec, pub strings: Vec<&'static str>, pub keywords: Vec<&'static str>, } impl Chunk { pub fn dissasemble_instr(&self, i: usize) { let op = Op::from_u8(self.bytecode[i]).unwrap(); use Op::*; match op { Pop | Store | Load | Nil | True | False | MatchNil | MatchTrue | MatchFalse | PanicIfNoMatch | MatchWord | ResetMatch | GetKey | PanicNoWhen | PanicNoMatch | TypeOf | Duplicate | Decrement | Truncate | Noop | LoadTuple => { println!("{i:04}: {op}") } Constant | MatchConstant => { let next = self.bytecode[i + 1]; let value = &self.constants[next as usize].show(self); println!("{i:04}: {:16} {next:04}: {value}", op.to_string()); } PushBinding | MatchTuple | PushTuple | PushDict | PushList | PushBox | Jump | JumpIfFalse | JumpIfNoMatch | JumpIfMatch | JumpBack | JumpIfZero | MatchDepth | PopN => { let next = self.bytecode[i + 1]; println!("{i:04}: {:16} {next:04}", op.to_string()); } } } pub fn kw_from(&self, kw: &str) -> Option { self.kw_index_from(kw).map(Value::Keyword) } pub fn kw_index_from(&self, kw: &str) -> Option { self.keywords.iter().position(|s| *s == kw) } } #[derive(Debug, Clone, PartialEq)] struct LoopInfo { start: usize, stack_root: usize, } impl LoopInfo { fn new(start: usize, stack_root: usize) -> LoopInfo { LoopInfo { start, stack_root } } } #[derive(Debug, Clone, PartialEq)] pub struct Compiler { pub chunk: Chunk, pub bindings: Vec, scope_depth: isize, match_depth: usize, stack_depth: usize, pub spans: Vec, pub nodes: Vec<&'static Ast>, pub ast: &'static Ast, pub span: SimpleSpan, pub src: &'static str, pub name: &'static str, loop_info: Vec, } fn is_binding(expr: &Spanned) -> bool { let (ast, _) = expr; use Ast::*; match ast { Let(..) | LBox(..) => true, Fn(name, ..) => !name.is_empty(), _ => false, } } impl Compiler { pub fn new(ast: &'static Spanned, name: &'static str, src: &'static str) -> Compiler { let chunk = Chunk { constants: vec![], bytecode: vec![], strings: vec![], keywords: vec![ "nil", "bool", "number", "keyword", "string", "tuple", "list", "dict", "box", "fn", ], }; Compiler { chunk, bindings: vec![], scope_depth: -1, match_depth: 0, stack_depth: 0, spans: vec![], nodes: vec![], ast: &ast.0, span: ast.1, loop_info: vec![], src, name, } } pub fn kw_from(&self, kw: &str) -> Option { self.kw_index_from(kw).map(Value::Keyword) } pub fn kw_index_from(&self, kw: &str) -> Option { self.chunk.keywords.iter().position(|s| *s == kw) } pub fn visit(&mut self, node: &'static Spanned) { let root_node = self.ast; let root_span = self.span; let (ast, span) = node; self.ast = ast; self.span = *span; self.compile(); self.ast = root_node; self.span = root_span; } fn emit_constant(&mut self, val: Value) { let constant_index = self.chunk.constants.len(); if constant_index > u8::MAX as usize { panic!( "internal Ludus compiler error: too many constants in chunk:{}:: {}", self.span, self.ast ) } self.chunk.constants.push(val); self.chunk.bytecode.push(Op::Constant as u8); self.spans.push(self.span); self.chunk.bytecode.push(constant_index as u8); self.spans.push(self.span); self.stack_depth += 1; } fn match_constant(&mut self, val: Value) { let constant_index = match self.chunk.constants.iter().position(|v| *v == val) { Some(idx) => idx, None => self.chunk.constants.len(), }; if constant_index > u8::MAX as usize { panic!( "internal Ludus compiler error: too many constants in chunk:{}:: {}", self.span, self.ast ) } if constant_index == self.chunk.constants.len() { self.chunk.constants.push(val); } self.chunk.bytecode.push(Op::MatchConstant as u8); self.spans.push(self.span); self.chunk.bytecode.push(constant_index as u8); self.spans.push(self.span); // self.bind(""); } fn emit_op(&mut self, op: Op) { self.chunk.bytecode.push(op as u8); self.spans.push(self.span); } fn emit_byte(&mut self, byte: usize) { self.chunk.bytecode.push(byte as u8); self.spans.push(self.span); } fn len(&self) -> usize { self.chunk.bytecode.len() } fn bind(&mut self, name: &'static str) { let binding = Binding { name, depth: self.scope_depth, stack_pos: self.stack_depth - self.match_depth - 1, }; println!("{:?}", binding); println!("stack: {}; match: {}", self.stack_depth, self.match_depth); self.bindings.push(binding); } fn pop(&mut self) { self.emit_op(Op::Pop); self.stack_depth -= 1; } fn pop_n(&mut self, n: usize) { self.emit_op(Op::PopN); self.emit_byte(n); self.stack_depth -= n; } fn enter_loop(&mut self) { self.loop_info .push(LoopInfo::new(self.len(), self.bindings.len())); } fn leave_loop(&mut self) { self.loop_info.pop(); } fn loop_info(&mut self) -> LoopInfo { self.loop_info.last().unwrap().clone() } fn loop_idx(&mut self) -> usize { self.loop_info.last().unwrap().start } fn loop_root(&mut self) -> usize { self.loop_info.last().unwrap().stack_root } pub fn compile(&mut self) { use Ast::*; match self.ast { Error => unreachable!(), Nil => { self.emit_op(Op::Nil); self.stack_depth += 1; } Number(n) => self.emit_constant(Value::Number(*n)), Boolean(b) => { self.emit_op(if *b { Op::True } else { Op::False }); self.stack_depth += 1; } String(s) => { let existing_str = self.chunk.strings.iter().position(|e| e == s); let str_index = match existing_str { Some(idx) => idx, None => self.chunk.strings.len(), }; self.chunk.strings.push(s); self.emit_constant(Value::Interned(str_index)); } Keyword(s) => { let existing_kw = self.chunk.keywords.iter().position(|kw| kw == s); let kw_index = match existing_kw { Some(index) => index, None => self.chunk.keywords.len(), }; if kw_index == self.chunk.keywords.len() { self.chunk.keywords.push(s); } self.emit_constant(Value::Keyword(kw_index)); } Block(lines) => { self.scope_depth += 1; for expr in lines.iter().take(lines.len() - 1) { if is_binding(expr) { self.visit(expr); } else { self.visit(expr); self.pop(); // self.emit_op(Op::Pop); } } let last_expr = lines.last().unwrap(); if is_binding(last_expr) { self.visit(last_expr); self.emit_op(Op::Duplicate); self.stack_depth += 1; } else { self.visit(last_expr); self.stack_depth += 1; } self.emit_op(Op::Store); self.scope_depth -= 1; while let Some(binding) = self.bindings.last() { if binding.depth > self.scope_depth { self.pop(); // self.emit_op(Op::Pop); self.bindings.pop(); } else { break; } } self.pop(); // self.emit_op(Op::Pop); self.emit_op(Op::Load); } If(cond, then, r#else) => { self.visit(cond); let jif_idx = self.len(); self.emit_op(Op::JumpIfFalse); self.emit_byte(0xff); self.stack_depth -= 1; self.visit(then); let jump_idx = self.len(); self.emit_op(Op::Jump); self.emit_byte(0xff); self.visit(r#else); self.stack_depth -= 1; let end_idx = self.len(); let jif_offset = jump_idx - jif_idx; let jump_offset = end_idx - jump_idx - 2; self.chunk.bytecode[jif_idx + 1] = jif_offset as u8; self.chunk.bytecode[jump_idx + 1] = jump_offset as u8; } Let(patt, expr) => { self.match_depth = 0; self.emit_op(Op::ResetMatch); self.visit(expr); self.visit(patt); self.emit_op(Op::PanicIfNoMatch); } WordPattern(name) => { self.emit_op(Op::MatchWord); self.bind(name); } Word(name) => { self.emit_op(Op::PushBinding); self.stack_depth += 1; let biter = self.bindings.iter().rev(); for binding in biter { if binding.name == *name { self.emit_byte(binding.stack_pos); break; } } } PlaceholderPattern => { self.emit_op(Op::MatchWord); // self.bind(""); } NilPattern => { self.emit_op(Op::MatchNil); // self.bind(""); } BooleanPattern(b) => { if *b { self.emit_op(Op::MatchTrue); // self.bind(""); } else { self.emit_op(Op::MatchFalse); // self.bind(""); } } NumberPattern(n) => { self.match_constant(Value::Number(*n)); } KeywordPattern(s) => { let existing_kw = self.chunk.keywords.iter().position(|kw| kw == s); let kw_index = match existing_kw { Some(index) => index, None => self.chunk.keywords.len(), }; if kw_index == self.chunk.keywords.len() { self.chunk.keywords.push(s); } self.match_constant(Value::Keyword(kw_index)); } StringPattern(s) => { let existing_str = self.chunk.strings.iter().position(|e| e == s); let str_index = match existing_str { Some(idx) => idx, None => self.chunk.strings.len(), }; if str_index == self.chunk.strings.len() { self.chunk.strings.push(s) } self.match_constant(Value::Interned(str_index)); } // TODO: finish this work // What's going on: // Currently, bindings are made in lockstep with the stack. // And the index of the binding in the bindings array in the compiler gets converted to a u8 as the index into the stack // I suspect this will have to change when we get stack frames // But what's happening here is that nested tuple bindings are out of order // When a tuple gets unfolded at the end of the stack, the binding that matches is now not where you'd expect // The whole "match depth" construct, while working, is what allows for out-of-order matching/binding // So either: // - Bindings need to work differently, where there's some way of representing them that's not just the index of where the name is // - Or we need a way of working with nested tuples that ensure bindings continue to be orederly // My sense is that the former is the correct approach. // It introduces some complexity: a binding will have to be a `(name, offset)` tuple or some such // Working with nested tuples could perhaps be solved by representing tuples on the stack, but then you're going to run into similar problems with list and dict patterns // And so, the thing to, I think, is to get clever with an offset // But to do that, probably the compiler needs to model the stack? Ugh. // SO, THEN: // [x] 1. we need a stack counter, that increases any time anything gets pushed to the stack, and decreases any time anything gets popped // [x] 2. We need to change the representation of bindings to be a tuple (name, index), where index is `stack size - match depth` // [x] 3. This means we get to remove the "silent" bindings where all patterns add a `""` binding // [x] 4. Question: given that we need both of these things, should I model this as methods rather than explicit `emit_op` calls? Probably. // Currently: there's still an off by one error with the current test code with nested tuples, but I can prolly fix this? TuplePattern(members) => { self.emit_op(Op::MatchTuple); self.emit_byte(members.len()); self.emit_op(Op::JumpIfNoMatch); let before_load_tup_idx = self.len(); self.emit_byte(0xff); let mut jump_idxes = vec![]; self.match_depth += members.len(); self.emit_op(Op::LoadTuple); self.stack_depth += members.len(); for member in members { self.match_depth -= 1; self.emit_op(Op::MatchDepth); self.emit_byte(self.match_depth); self.visit(member); self.emit_op(Op::JumpIfNoMatch); jump_idxes.push(self.len()); self.emit_byte(0xff); } self.emit_op(Op::Jump); let jump_idx = self.len(); self.emit_byte(0xff); for idx in jump_idxes { self.chunk.bytecode[idx] = self.len() as u8 - idx as u8 - 1; } for _ in 0..members.len() { // self.pop(); // this only runs if there's no match // so don't change the representation of the stack // contingencies will be handled by the binding forms // thus: emit Op::Pop directly self.emit_op(Op::Pop); } self.chunk.bytecode[before_load_tup_idx] = self.len() as u8 - before_load_tup_idx as u8 - 1; self.chunk.bytecode[jump_idx] = self.len() as u8 - jump_idx as u8 - 1; } Tuple(members) => { for member in members { self.visit(member); } self.emit_op(Op::PushTuple); self.emit_byte(members.len()); self.stack_depth = self.stack_depth + 1 - members.len(); } List(members) => { for member in members { self.visit(member); } self.emit_op(Op::PushList); self.emit_byte(members.len()); self.stack_depth = self.stack_depth + 1 - members.len(); } LBox(name, expr) => { self.visit(expr); self.emit_op(Op::PushBox); self.stack_depth += 1; self.bind(name); } Dict(pairs) => { for pair in pairs { self.visit(pair); } self.emit_op(Op::PushDict); self.emit_byte(pairs.len()); self.stack_depth = self.stack_depth + 1 - pairs.len(); } Pair(key, value) => { let existing_kw = self.chunk.keywords.iter().position(|kw| kw == key); let kw_index = match existing_kw { Some(index) => index, None => self.chunk.keywords.len(), }; if kw_index == self.chunk.keywords.len() { self.chunk.keywords.push(key); } self.emit_constant(Value::Keyword(kw_index)); self.visit(value); } Synthetic(first, second, rest) => { match (&first.0, &second.0) { (Word(_), Keyword(_)) => { self.visit(first); self.visit(second); self.emit_op(Op::GetKey); } (Keyword(_), Arguments(args)) => { self.visit(&args[0]); self.visit(first); self.emit_op(Op::GetKey); } (Word(_), Arguments(_)) => { todo!() } _ => unreachable!(), } // TODO: implement longer synthetic expressions for term in rest { todo!() } } // TODO: Keep track of the stack in // WHEN and MATCH: // each needs to just hold onto the stack depth // before each clause, and reset it after each When(clauses) => { let mut jump_idxes = vec![]; let mut clauses = clauses.iter(); let stack_depth = self.stack_depth; while let Some((WhenClause(cond, body), _)) = clauses.next() { self.visit(cond.as_ref()); self.emit_op(Op::JumpIfFalse); let jif_jump_idx = self.len(); self.emit_byte(0xff); self.visit(body); self.emit_op(Op::Jump); jump_idxes.push(self.len()); self.emit_byte(0xff); self.chunk.bytecode[jif_jump_idx] = self.len() as u8 - jif_jump_idx as u8 - 1; self.stack_depth = stack_depth; } self.stack_depth += 1; self.emit_op(Op::PanicNoWhen); for idx in jump_idxes { self.chunk.bytecode[idx] = self.len() as u8 - idx as u8 + 1; } } WhenClause(..) => unreachable!(), Match(scrutinee, clauses) => { self.visit(scrutinee.as_ref()); let stack_depth = self.stack_depth; let mut jump_idxes = vec![]; let mut clauses = clauses.iter(); while let Some((MatchClause(pattern, guard, body), _)) = clauses.next() { self.scope_depth += 1; self.match_depth = 0; self.visit(pattern); self.emit_op(Op::JumpIfNoMatch); let jnm_jump_idx = self.len(); self.emit_byte(0xff); // conditional compilation of guards // hard to DRY out match guard.as_ref() { Some(expr) => { self.visit(expr); self.emit_op(Op::JumpIfFalse); let jif_idx = self.len(); self.emit_byte(0xff); self.visit(body); self.emit_op(Op::Store); self.scope_depth -= 1; while let Some(binding) = self.bindings.last() { if binding.depth > self.scope_depth { self.pop(); // self.emit_op(Op::Pop); self.bindings.pop(); } else { break; } } self.emit_op(Op::Jump); jump_idxes.push(self.len()); self.emit_byte(0xff); self.chunk.bytecode[jnm_jump_idx] = self.len() as u8 - jnm_jump_idx as u8 - 1; self.chunk.bytecode[jif_idx] = self.len() as u8 - jif_idx as u8 - 1; } None => { self.visit(body); self.emit_op(Op::Store); self.scope_depth -= 1; while let Some(binding) = self.bindings.last() { if binding.depth > self.scope_depth { self.pop(); // self.emit_op(Op::Pop); self.bindings.pop(); } else { break; } } self.emit_op(Op::Jump); jump_idxes.push(self.len()); self.emit_byte(0xff); self.chunk.bytecode[jnm_jump_idx] = self.len() as u8 - jnm_jump_idx as u8 - 1; } } self.stack_depth = stack_depth; } self.emit_op(Op::PanicNoMatch); for idx in jump_idxes { self.chunk.bytecode[idx] = self.len() as u8 - idx as u8 - 1; } self.pop(); // self.emit_op(Op::Pop); self.emit_op(Op::Load); self.stack_depth += 1; } MatchClause(..) => unreachable!(), Fn(name, body, doc) => { // first, declare the function // TODO: or, check if the function has already been declared! let init_val = Value::Fn(Rc::new(OnceCell::new())); self.emit_constant(init_val); self.bind(name); // compile the function let mut compiler = Compiler::new(body, self.name, self.src); compiler.compile(); if crate::DEBUG_COMPILE { println!("==function: {name}=="); compiler.disassemble(); } let lfn = crate::value::LFn { name, doc: *doc, chunk: compiler.chunk, closed: vec![], }; // TODO: close over everything accessed in the function // TODO: pull the function off the stack, and set the OnceCell. } FnDeclaration(name) => { let lfn = Value::Fn(Rc::new(OnceCell::new())); self.emit_constant(lfn); self.bind(name); } FnBody(clauses) => { self.emit_op(Op::ResetMatch); todo!(); } // TODO: add stack-tracking to this Repeat(times, body) => { self.visit(times); self.emit_op(Op::Truncate); // skip the decrement the first time self.emit_op(Op::Jump); self.emit_byte(1); // begin repeat self.emit_op(Op::Decrement); let repeat_begin = self.len(); self.emit_op(Op::Duplicate); self.stack_depth += 1; self.emit_op(Op::JumpIfZero); self.emit_byte(0xff); // compile the body self.visit(body); // pop whatever value the body returns self.pop(); // self.emit_op(Op::Pop); self.emit_op(Op::JumpBack); // set jump points let repeat_end = self.len(); self.emit_byte(repeat_end - repeat_begin); self.chunk.bytecode[repeat_begin + 2] = (repeat_end - repeat_begin - 2) as u8; // pop the counter self.pop(); // self.emit_op(Op::Pop); // and emit nil self.emit_constant(Value::Nil); } Loop(value, clauses) => { todo!(); //algo: //first, put the values on the stack let (Ast::Tuple(members), _) = value.as_ref() else { unreachable!() }; for member in members { self.visit(member); } let arity = members.len(); //then, save the beginning of the loop self.enter_loop(); self.emit_op(Op::ResetMatch); //next, compile each clause: let mut clauses = clauses.iter(); let mut jump_idxes = vec![]; while let Some((Ast::MatchClause(pattern, _, body), _)) = clauses.next() { self.scope_depth += 1; let (Ast::TuplePattern(members), _) = pattern.as_ref() else { unreachable!() }; let mut match_depth = arity; let mut members = members.iter(); while match_depth > 0 { self.emit_op(Op::MatchDepth); self.emit_byte(match_depth - 1); self.visit(members.next().unwrap()); match_depth -= 1; } self.emit_op(Op::JumpIfNoMatch); let jnm_idx = self.len(); self.emit_byte(0xff); self.visit(body); self.emit_op(Op::Jump); self.emit_byte(0xff); jump_idxes.push(self.len()); self.chunk.bytecode[jnm_idx] = self.len() as u8 - jnm_idx as u8; self.scope_depth -= 1; } self.emit_op(Op::PanicNoMatch); // TODO: fix up jump indexes a la match self.leave_loop(); } Recur(_) => { // algo // visit each member of the arguments // then store those in the return register // then pop back to loop stack root // then jump to loop start } Interpolated(..) | Arguments(..) | Placeholder | Panic(..) | Do(..) | Splat(..) | InterpolatedPattern(..) | AsPattern(..) | Splattern(..) | ListPattern(..) | PairPattern(..) | DictPattern(..) => todo!(), } } pub fn disassemble(&self) { println!("=== chunk: {} ===", self.name); println!("IDX | CODE | INFO"); let mut codes = self.chunk.bytecode.iter().enumerate(); while let Some((i, byte)) = codes.next() { let op = Op::from_u8(*byte).unwrap(); use Op::*; match op { Noop | Pop | Store | Load | Nil | True | False | MatchNil | MatchTrue | MatchFalse | MatchWord | ResetMatch | PanicIfNoMatch | GetKey | PanicNoWhen | PanicNoMatch | TypeOf | Duplicate | Truncate | Decrement | LoadTuple => { println!("{i:04}: {op}") } Constant | MatchConstant => { let (_, next) = codes.next().unwrap(); let value = &self.chunk.constants[*next as usize].show(&self.chunk); println!("{i:04}: {:16} {next:04}: {value}", op.to_string()); } PushBinding | MatchTuple | PushTuple | PushDict | PushList | PushBox | Jump | JumpIfFalse | JumpIfNoMatch | JumpIfMatch | JumpBack | JumpIfZero | MatchDepth | PopN => { let (_, next) = codes.next().unwrap(); println!("{i:04}: {:16} {next:04}", op.to_string()); } } } } }