diff --git a/bytecode_thoughts.md b/bytecode_thoughts.md new file mode 100644 index 0000000..1185b9d --- /dev/null +++ b/bytecode_thoughts.md @@ -0,0 +1,87 @@ +# Working notes on bytecode stuff + +### 2024-12-15 +So far, I've done the easy stuff: constants, and ifs. + +There's still some easy stuff left: +* [ ] lists +* [ ] dicts +* [ ] when +* [ ] panic + +So I'll do those next. + +But then we've got two doozies: patterns and bindings, and tuples. + +#### Tuples make things hard +In fact, it's tuples that make things hard. +The idea is that, when possible, tuples should be stored on the stack. +That makes them a different creature than anything else. +But the goal is to be able, in a function call, to just push a tuple onto the stack, and then match against it. +Because a tuple _isn't_ just another `Value`, that makes things challenging. +BUT: matching against all other `Values` should be straightforward enough? + +I think that the way to do this is to reify patterns. +Rather than try to emit bytecodes to embody patterns, the patterns are some kind of data that get compiled and pushed onto a stack like keywords and interned strings and whatnot. +And then you can push a pattern onto the stack right behind a value, and then have a `match` opcode that pops them off. + +Things get a bit gnarly since patterns can be nested. I'll start with the basic cases and run from there. + +But when things get *very* gnarly is considering tuples on the stack. +How do you pop off a tuple? + +Two thoughts: +1. Just put tuples on the heap. And treat function arguments/matching differently. +2. Have a "register" that stages values to be pattern matched. + +##### Regarding the first option +I recall seeing somebody somewhere make a comment that trying to represent function arguments as tuples caused tons of pain. +I can see why that would be the case, from an implementation standpoint. +We should have _values_, and don't do fancy bookkeeping if we don't have to. + +_Conceptually_, it makes a great deal of sense to think of tuples as being deeply the same as function invocation. +But _practically_, they are different things, especially with Rust underneath. + +This feels like this cuts along the grain, and so this is what I will try. + +I suspect that I'll end up specializing a lot around function arguments and calling, but that feels more tractable than the bookkeeping around stack-based tuples. + +### 2024-12-17 +Next thoughts: take some things systematically rather than choosing an approach first. + +#### Things that always match +* Placeholder. + - I _think_ this is just a no-op. A `let` expression leaves its rhs pushed on the stack. + +* Word: put something on the stack, and bind a name. + - This should follow the logic of locals as articulated in _Crafting Interpreters_. + +In both of these cases, there's no conditional logic, simply a bind. + +#### Things that never bind +* Atomic values: put the rhs on the stack, then do an equality check, and panic if it fails. Leave the thing on the stack. + +#### Analysis +In terms of bytecode, I think one thing to do, in the simple case, is to do the following: +* `push` a `pattern` onto the stack +* `match`--pops the pattern and the value off the stack, and then applies the pattern to the value. It leaves the value on the stack, and pushes a special value onto the stack representing a match, or not. + - We'll probably want `match-1`, `match-2`, `match-3`, etc., opcodes for matching a value that's that far back in the stack. E.g., `match-1` matches against not the top element, but the `top - 1` element. + - This is _specifically_ for matching function arguments and `loop` forms. +* There are a few different things we might do from here: + - `panic_if_no_match`: panic if the last thing is a `no_match`, or just keep going if not. + - `jump_if_no_match`: in a `match` form or a function, we'll want to move to the next clause if there's no match, so jump to the next clause's `pattern` `push` code. +* Compound patterns are going to be more complex. + - I think, for example, what you're going to need to do is to get opcodes that work on our data structures, so, for example, when you have a `match_compound` opcode and you start digging into the pattern. +* Compound patterns are specifically _data structures_. So simple structures should be stack-allocated, and and complex structures should be pointers to something on the heap. Maybe? + +#### A little note +For instructions that need more than 256 possibilities, we'll need to mush two `u8`s together into a `u16`. The one liner for this is: + +```rust +let number = ((first as u16) << 8) | second as u16; +``` + +#### Oy, stacks and expressions +One thing that's giving me grief is when to pop and when to note on the value stack. + +Consider diff --git a/src/compiler.rs b/src/compiler.rs index 4b90399..4639dfd 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -7,35 +7,41 @@ use num_traits::FromPrimitive; #[derive(Copy, Clone, Debug, PartialEq, Eq, FromPrimitive, ToPrimitive)] pub enum Op { - Return, Constant, Jump, JumpIfFalse, + Pop, + PushBinding, + Store, + Load, } impl std::fmt::Display for Op { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { use Op::*; match self { - Return => write!(f, "return"), Constant => write!(f, "constant"), Jump => write!(f, "jump"), JumpIfFalse => write!(f, "jump_if_false"), + Pop => write!(f, "pop"), + PushBinding => write!(f, "push_binding"), + Store => write!(f, "store"), + Load => write!(f, "load"), } } } #[derive(Clone, Debug, PartialEq)] -pub struct Local { +pub struct Binding { name: &'static str, - depth: u8, + depth: isize, } #[derive(Clone, Debug, PartialEq)] pub struct Chunk<'a> { - pub locals: Vec, - scope_depth: usize, - local_count: usize, + pub bindings: Vec, + scope_depth: isize, + num_bindings: usize, pub constants: Vec, pub bytecode: Vec, pub spans: Vec, @@ -51,9 +57,9 @@ pub struct Chunk<'a> { impl<'a> Chunk<'a> { pub fn new(ast: &'a Spanned, name: &'static str, src: &'static str) -> Chunk<'a> { Chunk { - locals: vec![], - scope_depth: 0, - local_count: 0, + bindings: vec![], + scope_depth: -1, + num_bindings: 0, constants: vec![], bytecode: vec![], spans: vec![], @@ -107,6 +113,15 @@ impl<'a> Chunk<'a> { self.spans.push(self.span); } + fn bind(&mut self, name: &'static str) { + println!("binding {name} at depth {}", self.scope_depth); + self.bindings.push(Binding { + name, + depth: self.scope_depth, + }); + println!("{:?}", self.bindings) + } + pub fn compile(&mut self) { use Ast::*; match self.ast { @@ -129,11 +144,22 @@ impl<'a> Chunk<'a> { } Block(lines) => { self.scope_depth += 1; + println!("now entering scope level {}", self.scope_depth); for expr in lines { self.visit(expr); + self.emit_op(Op::Pop); } - self.emit_op(Op::Return); + self.emit_op(Op::Store); self.scope_depth -= 1; + while let Some(binding) = self.bindings.last() { + if binding.depth > self.scope_depth { + self.emit_op(Op::Pop); + self.bindings.pop(); + } else { + break; + } + } + self.emit_op(Op::Load); } If(cond, then, r#else) => { self.visit(cond); @@ -151,12 +177,30 @@ impl<'a> Chunk<'a> { self.bytecode[jif_idx + 1] = jif_offset as u8; self.bytecode[jump_idx + 1] = jump_offset as u8; } - // Let(patt, expr) => { - // self.visit(expr); - // self.visit(patt); - // } - // WordPattern(name) => {} - // PlaceholderPattern => {} + Let(patt, expr) => { + println!("let binding!"); + self.visit(expr); + self.visit(patt); + } + WordPattern(name) => { + self.bind(name); + } + Word(name) => { + println!("resolving binding {name}"); + println!("current bindings {:?}", self.bindings); + self.emit_op(Op::PushBinding); + let biter = self.bindings.iter().enumerate().rev(); + for (i, binding) in biter { + println!("at index {i}"); + if binding.name == *name { + self.bytecode.push(i as u8); + break; + } + } + } + PlaceholderPattern => { + self.bind("_"); + } _ => todo!(), } } @@ -169,12 +213,16 @@ impl<'a> Chunk<'a> { let op = Op::from_u8(*byte).unwrap(); use Op::*; match op { - Return => println!("{i:04}: {op}"), + Pop | Store | Load => println!("{i:04}: {op}"), Constant => { let (_, next) = codes.next().unwrap(); let value = &self.constants[*next as usize].show(self); println!("{i:04}: {:16} {next:04}: {value}", op.to_string()); } + PushBinding => { + let (_, next) = codes.next().unwrap(); + println!("{i:04}: {:16} {next:04}", op.to_string()); + } Jump | JumpIfFalse => { let (_, next) = codes.next().unwrap(); println!("{i:04}: {:16} {next:04}", op.to_string()) diff --git a/src/main.rs b/src/main.rs index f38488b..cabcea9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,7 @@ use chumsky::{input::Stream, prelude::*}; +mod memory_sandbox; + mod spans; mod lexer; @@ -52,7 +54,9 @@ pub fn run(src: &'static str) { pub fn main() { let src = " -if false +let foo = :let_foo + +let bar = if true then { :foo :bar @@ -63,6 +67,10 @@ if false 2 3 } + +foo + +bar "; run(src); } diff --git a/src/vm.rs b/src/vm.rs index 33c4f6c..193bcea 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -4,6 +4,7 @@ use crate::spans::Spanned; use crate::value::Value; use chumsky::prelude::SimpleSpan; use num_traits::FromPrimitive; +use std::mem::swap; #[derive(Debug, Clone, PartialEq)] pub struct Panic { @@ -29,7 +30,7 @@ pub struct Vm<'a> { pub stack: Vec, pub chunk: &'a Chunk<'a>, pub ip: usize, - pub bindings: Vec<(u8, usize)>, + pub return_register: Value, } impl<'a> Vm<'a> { @@ -38,24 +39,28 @@ impl<'a> Vm<'a> { chunk, stack: vec![], ip: 0, - bindings: vec![], + return_register: Value::Nil, } } pub fn push(&mut self, value: Value) { + println!("{:04} pushing {value:?}", self.ip); self.stack.push(value); } pub fn pop(&mut self) -> Value { - self.stack.pop().unwrap() + let value = self.stack.pop().unwrap(); + println!("{:04} popping {value:?}", self.ip); + value } pub fn interpret(&mut self) -> Result { - let byte = self.chunk.bytecode[self.ip]; - let op = Op::from_u8(byte).unwrap(); + let Some(byte) = self.chunk.bytecode.get(self.ip) else { + return Ok(self.stack.pop().unwrap()); + }; + let op = Op::from_u8(*byte).unwrap(); use Op::*; match op { - Return => Ok(self.stack.pop().unwrap()), Constant => { let const_idx = self.chunk.bytecode[self.ip + 1]; let value = self.chunk.constants[const_idx as usize].clone(); @@ -70,7 +75,7 @@ impl<'a> Vm<'a> { } JumpIfFalse => { let jump_len = self.chunk.bytecode[self.ip + 1]; - let cond = self.stack.pop().unwrap(); + let cond = self.pop(); match cond { Value::Nil | Value::False => { self.ip += jump_len as usize + 2; @@ -82,6 +87,31 @@ impl<'a> Vm<'a> { } } } + Pop => { + self.pop(); + self.ip += 1; + self.interpret() + } + PushBinding => { + let binding_idx = self.chunk.bytecode[self.ip + 1] as usize; + let binding_value = self.stack[binding_idx].clone(); + self.push(binding_value); + self.ip += 2; + self.interpret() + } + Store => { + self.return_register = self.pop(); + self.push(Value::Nil); + self.ip += 1; + self.interpret() + } + Load => { + let mut value = Value::Nil; + swap(&mut self.return_register, &mut value); + self.push(value); + self.ip += 1; + self.interpret() + } } } }