From d4342b0623476374925d0565e6c7f34538b37c80 Mon Sep 17 00:00:00 2001 From: Scott Richmond Date: Wed, 18 Dec 2024 01:28:23 -0500 Subject: [PATCH] get binding & pretty debugging working --- bytecode_thoughts.md | 48 ++++++++++++++++++++++++++++++++++++++++++- src/compiler.rs | 49 +++++++++++++++++++++++++++++++++++--------- src/main.rs | 22 ++++++++++++-------- src/parser.rs | 2 +- src/value.rs | 15 ++++++++++++++ src/vm.rs | 32 +++++++++++++++++++++++++---- 6 files changed, 143 insertions(+), 25 deletions(-) diff --git a/bytecode_thoughts.md b/bytecode_thoughts.md index 1185b9d..89ef94c 100644 --- a/bytecode_thoughts.md +++ b/bytecode_thoughts.md @@ -84,4 +84,50 @@ let number = ((first as u16) << 8) | second as u16; #### Oy, stacks and expressions One thing that's giving me grief is when to pop and when to note on the value stack. -Consider +So, like, we need to make sure that a line of code leaves the stack exactly where it was before it ran, with the exception of binding forms: `let`, `fn`, `box`, etc. Those leave one (or more!) items on the stack. + +In the simplest case, we have a line of code that's just a constant: + +``` +false +``` +This should emit the bytecode instructions (more or less): +``` +push false +pop +``` +The push comes from the `false` value. +The pop comes from the end of a (nonbinding) line. + +The problem is that there's no way (at all, in Ludus) to distinguish between an expression that's just a constant and a line that is a complete line of code that's an expression. + +So if we have the following: +``` +let foo = false +``` +We want: +``` +push false +``` +Or, rather, given that `foo` is a word pattern, what we actually want is: +``` +push false # constant +push pattern/word # load pattern +pop +pop # compare +push false # for the binding +``` + +But it's worth it here to explore Ludus's semantics. +It's the case that there are actually only three binding forms (for now): `let`, `fn`, and `box`. +Figuring out `let` will help a great deal. +Match also binds things, but at the very least, match doesn't bind with expressions on the rhs, but a single value. + +Think, too about expressions: everything comes down to a single value (of course), even tuples (especially now that I'm separating function calls from tuple values (probably)). +So: anything that *isn't* a binding form should, before the `pop` from the end of a line, only leave a single value on the stack. +Which suggests that, as odd as it is, pushing a single `nil` onto the stack, just to pop it, might make sense. +Or, perhaps the thing to do is to peek: if the line in question is binding or not, then emit different bytecode. +That's probably the thing to do. Jesus, Scott. + +And **another** thing worth internalizing: every single instruction that's not an explicit push or pop should leave the stack length unchanged. +So store and load need always to swap in a `nil` diff --git a/src/compiler.rs b/src/compiler.rs index 4639dfd..b3f5df3 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -54,6 +54,16 @@ pub struct Chunk<'a> { pub name: &'static str, } +fn is_binding(expr: &Spanned) -> bool { + let (ast, _) = expr; + use Ast::*; + match ast { + Let(..) | LBox(..) => true, + Fn(name, ..) => *name != "*anon", + _ => false, + } +} + impl<'a> Chunk<'a> { pub fn new(ast: &'a Spanned, name: &'static str, src: &'static str) -> Chunk<'a> { Chunk { @@ -114,12 +124,10 @@ impl<'a> Chunk<'a> { } fn bind(&mut self, name: &'static str) { - println!("binding {name} at depth {}", self.scope_depth); self.bindings.push(Binding { name, depth: self.scope_depth, }); - println!("{:?}", self.bindings) } pub fn compile(&mut self) { @@ -144,11 +152,15 @@ impl<'a> Chunk<'a> { } Block(lines) => { self.scope_depth += 1; - println!("now entering scope level {}", self.scope_depth); - for expr in lines { - self.visit(expr); - self.emit_op(Op::Pop); + for expr in lines.iter().take(lines.len() - 1) { + if is_binding(expr) { + self.visit(expr) + } else { + self.visit(expr); + self.emit_op(Op::Pop); + } } + self.visit(lines.last().unwrap()); self.emit_op(Op::Store); self.scope_depth -= 1; while let Some(binding) = self.bindings.last() { @@ -178,7 +190,6 @@ impl<'a> Chunk<'a> { self.bytecode[jump_idx + 1] = jump_offset as u8; } Let(patt, expr) => { - println!("let binding!"); self.visit(expr); self.visit(patt); } @@ -186,12 +197,9 @@ impl<'a> Chunk<'a> { self.bind(name); } Word(name) => { - println!("resolving binding {name}"); - println!("current bindings {:?}", self.bindings); self.emit_op(Op::PushBinding); let biter = self.bindings.iter().enumerate().rev(); for (i, binding) in biter { - println!("at index {i}"); if binding.name == *name { self.bytecode.push(i as u8); break; @@ -230,4 +238,25 @@ impl<'a> Chunk<'a> { } } } + + pub fn dissasemble_instr(&self, i: usize) { + let op = Op::from_u8(self.bytecode[i]).unwrap(); + use Op::*; + match op { + Pop | Store | Load => println!("{i:04}: {op}"), + Constant => { + let next = self.bytecode[i + 1]; + let value = &self.constants[next as usize].show(self); + println!("{i:04}: {:16} {next:04}: {value}", op.to_string()); + } + PushBinding => { + let next = self.bytecode[i + 1]; + println!("{i:04}: {:16} {next:04}", op.to_string()); + } + Jump | JumpIfFalse => { + let next = self.bytecode[i + 1]; + println!("{i:04}: {:16} {next:04}", op.to_string()) + } + } + } } diff --git a/src/main.rs b/src/main.rs index cabcea9..9fda65d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,8 @@ use chumsky::{input::Stream, prelude::*}; +const DEBUG_COMPILE: bool = true; +const DEBUG_RUN: bool = true; + mod memory_sandbox; mod spans; @@ -41,7 +44,14 @@ pub fn run(src: &'static str) { let mut chunk = Chunk::new(&parsed, "test", src); chunk.compile(); - chunk.disassemble(); + if DEBUG_COMPILE { + chunk.disassemble(); + println!("\n\n") + } + + if DEBUG_RUN { + println!("=== vm run: test ==="); + } let mut vm = Vm::new(&chunk); let result = vm.interpret(); @@ -58,15 +68,9 @@ let foo = :let_foo let bar = if true then { - :foo - :bar - :baz - } - else { - 1 - 2 - 3 + let baz = :baz } + else :whatever foo diff --git a/src/parser.rs b/src/parser.rs index 80eecfa..0556d42 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -922,7 +922,7 @@ where let lambda = just(Token::Reserved("fn")) .ignore_then(fn_unguarded.clone()) - .map_with(|clause, e| (Fn("anonymous", vec![clause], None), e.span())); + .map_with(|clause, e| (Fn("*anon", vec![clause], None), e.span())); let fn_clauses = fn_clause .clone() diff --git a/src/value.rs b/src/value.rs index 4cfbc79..caed0cb 100644 --- a/src/value.rs +++ b/src/value.rs @@ -41,6 +41,21 @@ pub enum Value { Fn(Rc>), } +impl std::fmt::Display for Value { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + use Value::*; + match self { + Nil => write!(f, "nil"), + True => write!(f, "true"), + False => write!(f, "false"), + Keyword(idx) => write!(f, ":{idx}"), + Interned(idx) => write!(f, "\"@{idx}\""), + Number(n) => write!(f, "{n}"), + _ => todo!(), + } + } +} + impl Value { pub fn show(&self, ctx: &Chunk) -> String { use Value::*; diff --git a/src/vm.rs b/src/vm.rs index 193bcea..9d1711b 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -44,20 +44,35 @@ impl<'a> Vm<'a> { } pub fn push(&mut self, value: Value) { - println!("{:04} pushing {value:?}", self.ip); self.stack.push(value); } pub fn pop(&mut self) -> Value { - let value = self.stack.pop().unwrap(); - println!("{:04} popping {value:?}", self.ip); - value + self.stack.pop().unwrap() + } + + fn print_stack(&self) { + let inner = self + .stack + .iter() + .map(|val| val.to_string()) + .collect::>() + .join("|"); + println!("{:04}: [{inner}] {}", self.ip, self.return_register); + } + + fn print_debug(&self) { + self.chunk.dissasemble_instr(self.ip); + self.print_stack(); } pub fn interpret(&mut self) -> Result { let Some(byte) = self.chunk.bytecode.get(self.ip) else { return Ok(self.stack.pop().unwrap()); }; + if crate::DEBUG_RUN { + self.print_debug(); + } let op = Op::from_u8(*byte).unwrap(); use Op::*; match op { @@ -107,7 +122,16 @@ impl<'a> Vm<'a> { } Load => { let mut value = Value::Nil; + // println!( + // "before swap, return register holds: {}", + // self.return_register + // ); swap(&mut self.return_register, &mut value); + // println!( + // "before swap, return register holds: {}", + // self.return_register + // ); + // println!("now local value holds {value}"); self.push(value); self.ip += 1; self.interpret()