From d943185db83780736c4e3811c5fa05a7fe34063d Mon Sep 17 00:00:00 2001 From: Scott Richmond Date: Sun, 22 Dec 2024 19:07:42 -0500 Subject: [PATCH] do lots of work --- bytecode_thoughts.md | 1 + src/compiler.rs | 236 ++++++++++++++++++++++++++++++++++++++----- src/main.rs | 17 ++-- src/value.rs | 41 ++++++-- src/vm.rs | 148 +++++++++++++++++++++++---- 5 files changed, 382 insertions(+), 61 deletions(-) diff --git a/bytecode_thoughts.md b/bytecode_thoughts.md index 89ef94c..4387aee 100644 --- a/bytecode_thoughts.md +++ b/bytecode_thoughts.md @@ -131,3 +131,4 @@ That's probably the thing to do. Jesus, Scott. And **another** thing worth internalizing: every single instruction that's not an explicit push or pop should leave the stack length unchanged. So store and load need always to swap in a `nil` + diff --git a/src/compiler.rs b/src/compiler.rs index b3f5df3..86208cc 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -7,6 +7,9 @@ use num_traits::FromPrimitive; #[derive(Copy, Clone, Debug, PartialEq, Eq, FromPrimitive, ToPrimitive)] pub enum Op { + Nil, + True, + False, Constant, Jump, JumpIfFalse, @@ -14,20 +17,52 @@ pub enum Op { PushBinding, Store, Load, + ResetMatch, + MatchNil, + MatchTrue, + MatchFalse, + MatchWord, + PanicIfNoMatch, + MatchConstant, + MatchTuple, + PushTuple, + PushList, + PushDict, + PushBox, + GetKey, + PanicNoWhen, } impl std::fmt::Display for Op { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { use Op::*; - match self { - Constant => write!(f, "constant"), - Jump => write!(f, "jump"), - JumpIfFalse => write!(f, "jump_if_false"), - Pop => write!(f, "pop"), - PushBinding => write!(f, "push_binding"), - Store => write!(f, "store"), - Load => write!(f, "load"), - } + let rep = match self { + Nil => "nil", + True => "true", + False => "false", + Constant => "constant", + Jump => "jump", + JumpIfFalse => "jump_if_false", + Pop => "pop", + PushBinding => "push_binding", + Store => "store", + Load => "load", + MatchNil => "match_nil", + MatchTrue => "match_true", + MatchFalse => "match_false", + MatchWord => "match_word", + ResetMatch => "reset_match", + PanicIfNoMatch => "panic_if_no_match", + MatchConstant => "match_constant", + MatchTuple => "match_tuple", + PushTuple => "push_tuple", + PushList => "push_list", + PushDict => "push_dict", + PushBox => "push_box", + GetKey => "get_key", + PanicNoWhen => "panic_no_when", + }; + write!(f, "{rep}") } } @@ -86,10 +121,11 @@ impl<'a> Chunk<'a> { } pub fn kw_from(&self, kw: &str) -> Option { - self.keywords - .iter() - .position(|s| *s == kw) - .map(Value::Keyword) + self.kw_index_from(kw).map(Value::Keyword) + } + + pub fn kw_index_from(&self, kw: &str) -> Option { + self.keywords.iter().position(|s| *s == kw) } pub fn visit(&mut self, node: &'a Spanned) { @@ -118,6 +154,27 @@ impl<'a> Chunk<'a> { self.spans.push(self.span); } + fn match_constant(&mut self, val: Value) { + let constant_index = match self.constants.iter().position(|v| *v == val) { + Some(idx) => idx, + None => self.constants.len(), + }; + if constant_index > u8::MAX as usize { + panic!( + "internal Ludus compiler error: too many constants in chunk:{}:: {}", + self.span, self.ast + ) + } + if constant_index == self.constants.len() { + self.constants.push(val); + } + self.bytecode.push(Op::MatchConstant as u8); + self.spans.push(self.span); + self.bytecode.push(constant_index as u8); + self.spans.push(self.span); + self.bind("*constant"); + } + fn emit_op(&mut self, op: Op) { self.bytecode.push(op as u8); self.spans.push(self.span); @@ -133,11 +190,15 @@ impl<'a> Chunk<'a> { pub fn compile(&mut self) { use Ast::*; match self.ast { - Nil => self.emit_constant(Value::Nil), + Nil => self.emit_op(Op::Nil), Number(n) => self.emit_constant(Value::Number(*n)), - Boolean(b) => self.emit_constant(if *b { Value::True } else { Value::False }), + Boolean(b) => self.emit_op(if *b { Op::True } else { Op::False }), String(s) => { - let str_index = self.strings.len(); + let existing_str = self.strings.iter().position(|e| e == s); + let str_index = match existing_str { + Some(idx) => idx, + None => self.strings.len(), + }; self.strings.push(s); self.emit_constant(Value::Interned(str_index)); } @@ -147,7 +208,9 @@ impl<'a> Chunk<'a> { Some(index) => index, None => self.keywords.len(), }; - self.keywords.push(s); + if kw_index == self.keywords.len() { + self.keywords.push(s); + } self.emit_constant(Value::Keyword(kw_index)); } Block(lines) => { @@ -190,10 +253,13 @@ impl<'a> Chunk<'a> { self.bytecode[jump_idx + 1] = jump_offset as u8; } Let(patt, expr) => { + self.emit_op(Op::ResetMatch); self.visit(expr); self.visit(patt); + self.emit_op(Op::PanicIfNoMatch); } WordPattern(name) => { + self.emit_op(Op::MatchWord); self.bind(name); } Word(name) => { @@ -207,8 +273,126 @@ impl<'a> Chunk<'a> { } } PlaceholderPattern => { + self.emit_op(Op::MatchWord); self.bind("_"); } + NilPattern => { + self.emit_op(Op::MatchNil); + self.bind("nil"); + } + BooleanPattern(b) => { + if *b { + self.emit_op(Op::MatchTrue); + self.bind("true"); + } else { + self.emit_op(Op::MatchFalse); + self.bind("false"); + } + } + NumberPattern(n) => { + self.match_constant(Value::Number(*n)); + } + KeywordPattern(s) => { + let existing_kw = self.keywords.iter().position(|kw| kw == s); + let kw_index = match existing_kw { + Some(index) => index, + None => self.keywords.len(), + }; + if kw_index == self.keywords.len() { + self.keywords.push(s); + } + self.match_constant(Value::Keyword(kw_index)); + } + StringPattern(s) => { + let existing_str = self.strings.iter().position(|e| e == s); + let str_index = match existing_str { + Some(idx) => idx, + None => self.strings.len(), + }; + if str_index == self.strings.len() { + self.strings.push(s) + } + self.match_constant(Value::Interned(str_index)); + } + Tuple(members) => { + for member in members { + self.visit(member); + } + self.emit_op(Op::PushTuple); + self.bytecode.push(members.len() as u8); + } + List(members) => { + for member in members { + self.visit(member); + } + self.emit_op(Op::PushList); + self.bytecode.push(members.len() as u8); + } + LBox(name, expr) => { + self.visit(expr); + self.emit_op(Op::PushBox); + self.bind(name); + } + Dict(pairs) => { + for pair in pairs { + self.visit(pair); + } + self.emit_op(Op::PushDict); + self.bytecode.push(pairs.len() as u8); + } + Pair(key, value) => { + let existing_kw = self.keywords.iter().position(|kw| kw == key); + let kw_index = match existing_kw { + Some(index) => index, + None => self.keywords.len(), + }; + if kw_index == self.keywords.len() { + self.keywords.push(key); + } + self.emit_constant(Value::Keyword(kw_index)); + self.visit(value); + } + Synthetic(first, second, rest) => { + match (&first.0, &second.0) { + (Word(_), Keyword(_)) => { + self.visit(first); + self.visit(second); + self.emit_op(Op::GetKey); + } + (Keyword(_), Arguments(args)) => { + self.visit(&args[0]); + self.visit(first); + self.emit_op(Op::GetKey); + } + (Word(_), Arguments(_)) => { + todo!() + } + _ => unreachable!(), + } + for term in rest { + todo!() + } + } + When(clauses) => { + let mut jump_idxes = vec![]; + let mut clauses = clauses.iter(); + while let Some((WhenClause(cond, body), _)) = clauses.next() { + self.visit(cond.as_ref()); + self.emit_op(Op::JumpIfFalse); + let jif_jump_idx = self.bytecode.len(); + self.bytecode.push(0xff); + self.visit(body); + self.emit_op(Op::Jump); + jump_idxes.push(self.bytecode.len()); + self.bytecode.push(0xff); + self.bytecode[jif_jump_idx] = + self.bytecode.len() as u8 - jif_jump_idx as u8 - 1; + } + self.emit_op(Op::PanicNoWhen); + for idx in jump_idxes { + self.bytecode[idx] = self.bytecode.len() as u8 - idx as u8 + 1; + } + } _ => todo!(), } } @@ -221,13 +405,16 @@ impl<'a> Chunk<'a> { let op = Op::from_u8(*byte).unwrap(); use Op::*; match op { - Pop | Store | Load => println!("{i:04}: {op}"), - Constant => { + Pop | Store | Load | Nil | True | False | MatchNil | MatchTrue | MatchFalse + | MatchWord | ResetMatch | PanicIfNoMatch | GetKey | PanicNoWhen => { + println!("{i:04}: {op}") + } + Constant | MatchConstant => { let (_, next) = codes.next().unwrap(); let value = &self.constants[*next as usize].show(self); println!("{i:04}: {:16} {next:04}: {value}", op.to_string()); } - PushBinding => { + PushBinding | MatchTuple | PushTuple | PushDict | PushList | PushBox => { let (_, next) = codes.next().unwrap(); println!("{i:04}: {:16} {next:04}", op.to_string()); } @@ -243,13 +430,16 @@ impl<'a> Chunk<'a> { let op = Op::from_u8(self.bytecode[i]).unwrap(); use Op::*; match op { - Pop | Store | Load => println!("{i:04}: {op}"), - Constant => { + Pop | Store | Load | Nil | True | False | MatchNil | MatchTrue | MatchFalse + | PanicIfNoMatch | MatchWord | ResetMatch | GetKey | PanicNoWhen => { + println!("{i:04}: {op}") + } + Constant | MatchConstant => { let next = self.bytecode[i + 1]; let value = &self.constants[next as usize].show(self); println!("{i:04}: {:16} {next:04}: {value}", op.to_string()); } - PushBinding => { + PushBinding | MatchTuple | PushTuple | PushDict | PushList | PushBox => { let next = self.bytecode[i + 1]; println!("{i:04}: {:16} {next:04}", op.to_string()); } diff --git a/src/main.rs b/src/main.rs index 9fda65d..433f171 100644 --- a/src/main.rs +++ b/src/main.rs @@ -64,17 +64,12 @@ pub fn run(src: &'static str) { pub fn main() { let src = " -let foo = :let_foo - -let bar = if true - then { - let baz = :baz - } - else :whatever - -foo - -bar +let foo = 42 +when { + foo -> :two + false -> :four + :else -> :thing +} "; run(src); } diff --git a/src/value.rs b/src/value.rs index caed0cb..b51c71b 100644 --- a/src/value.rs +++ b/src/value.rs @@ -5,12 +5,6 @@ use imbl::{HashMap, Vector}; use std::cell::RefCell; use std::rc::Rc; -#[derive(Clone, Debug, PartialEq)] -pub struct LBox { - pub name: usize, - pub cell: RefCell, -} - #[derive(Clone, Debug, PartialEq)] pub struct LFn { pub name: &'static str, @@ -27,17 +21,15 @@ pub enum Value { Nil, True, False, - Keyword(usize), // use an idx, rather than a raw index + Keyword(usize), Interned(usize), FnDecl(usize), String(Rc), Number(f64), Tuple(Rc>), - TupleStart { len: u8, size: u16 }, - TupleEnd { len: u8, size: u16 }, List(Box>), Dict(Box>), - Box(Rc), + Box(Rc>), Fn(Rc>), } @@ -51,6 +43,34 @@ impl std::fmt::Display for Value { Keyword(idx) => write!(f, ":{idx}"), Interned(idx) => write!(f, "\"@{idx}\""), Number(n) => write!(f, "{n}"), + Tuple(members) => write!( + f, + "({})", + members + .iter() + .map(|x| x.to_string()) + .collect::>() + .join(", ") + ), + List(members) => write!( + f, + "[{}]", + members + .iter() + .map(|x| x.to_string()) + .collect::>() + .join(", ") + ), + Dict(members) => write!( + f, + "#{{{}}}", + members + .iter() + .map(|(k, v)| format!("{k} {v}")) + .collect::>() + .join(", ") + ), + Box(value) => write!(f, "box {}", value.as_ref().borrow()), _ => todo!(), } } @@ -93,6 +113,7 @@ impl Value { format!("#{{{members}}}") } String(s) => s.as_ref().clone(), + Box(x) => format!("box {{ {} }}", x.as_ref().borrow().show(ctx)), _ => todo!(), } } diff --git a/src/vm.rs b/src/vm.rs index 9d1711b..3f28833 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -3,18 +3,22 @@ use crate::parser::Ast; use crate::spans::Spanned; use crate::value::Value; use chumsky::prelude::SimpleSpan; +use imbl::{HashMap, Vector}; use num_traits::FromPrimitive; +use std::cell::RefCell; use std::mem::swap; +use std::rc::Rc; #[derive(Debug, Clone, PartialEq)] -pub struct Panic { - pub input: &'static str, - pub src: &'static str, - pub msg: String, - pub span: SimpleSpan, - pub trace: Vec, - pub extra: String, -} +// pub struct Panic { +// pub input: &'static str, +// pub src: &'static str, +// pub msg: String, +// pub span: SimpleSpan, +// pub trace: Vec, +// pub extra: String, +// } +pub struct Panic(&'static str); #[derive(Debug, Clone, PartialEq)] pub struct Trace { @@ -31,6 +35,7 @@ pub struct Vm<'a> { pub chunk: &'a Chunk<'a>, pub ip: usize, pub return_register: Value, + pub matches: bool, } impl<'a> Vm<'a> { @@ -40,6 +45,7 @@ impl<'a> Vm<'a> { stack: vec![], ip: 0, return_register: Value::Nil, + matches: false, } } @@ -76,6 +82,21 @@ impl<'a> Vm<'a> { let op = Op::from_u8(*byte).unwrap(); use Op::*; match op { + Nil => { + self.push(Value::Nil); + self.ip += 1; + self.interpret() + } + True => { + self.push(Value::True); + self.ip += 1; + self.interpret() + } + False => { + self.push(Value::False); + self.ip += 1; + self.interpret() + } Constant => { let const_idx = self.chunk.bytecode[self.ip + 1]; let value = self.chunk.constants[const_idx as usize].clone(); @@ -122,20 +143,113 @@ impl<'a> Vm<'a> { } Load => { let mut value = Value::Nil; - // println!( - // "before swap, return register holds: {}", - // self.return_register - // ); swap(&mut self.return_register, &mut value); - // println!( - // "before swap, return register holds: {}", - // self.return_register - // ); - // println!("now local value holds {value}"); self.push(value); self.ip += 1; self.interpret() } + ResetMatch => { + self.matches = false; + self.ip += 1; + self.interpret() + } + MatchWord => { + self.matches = true; + self.ip += 1; + self.interpret() + } + MatchNil => { + if *self.stack.last().unwrap() == Value::Nil { + self.matches = true; + }; + self.ip += 1; + self.interpret() + } + MatchTrue => { + if *self.stack.last().unwrap() == Value::True { + self.matches = true; + }; + self.ip += 1; + self.interpret() + } + MatchFalse => { + if *self.stack.last().unwrap() == Value::False { + self.matches = true; + } + self.ip += 1; + self.interpret() + } + PanicIfNoMatch => { + if !self.matches { + Err(Panic("no match")) + } else { + self.ip += 1; + self.interpret() + } + } + MatchConstant => { + let const_idx = self.chunk.bytecode[self.ip + 1]; + let value = self.stack.last().unwrap(); + self.matches = *value == self.chunk.constants[const_idx as usize]; + self.ip += 2; + self.interpret() + } + PushTuple => { + let tuple_len = self.chunk.bytecode[self.ip + 1]; + let tuple_members = self.stack.split_off(self.stack.len() - tuple_len as usize); + let tuple = Value::Tuple(Rc::new(tuple_members)); + self.stack.push(tuple); + self.ip += 2; + self.interpret() + } + PushList => { + let list_len = self.chunk.bytecode[self.ip + 1]; + let list_members = self.stack.split_off(self.stack.len() - list_len as usize); + let list = Value::List(Box::new(Vector::from(list_members))); + self.stack.push(list); + self.ip += 2; + self.interpret() + } + PushDict => { + let dict_len = self.chunk.bytecode[self.ip + 1] as usize * 2; + let dict_members = self.stack.split_off(self.stack.len() - dict_len); + let mut dict = HashMap::new(); + let mut dict_iter = dict_members.iter(); + while let Some(kw) = dict_iter.next() { + let Value::Keyword(key) = kw else { + unreachable!() + }; + let value = dict_iter.next().unwrap(); + dict.insert(*key, value.clone()); + } + self.stack.push(Value::Dict(Box::new(dict))); + self.ip += 2; + self.interpret() + } + PushBox => { + let val = self.pop(); + self.stack.push(Value::Box(Rc::new(RefCell::new(val)))); + self.ip += 1; + self.interpret() + } + GetKey => { + let key = self.pop(); + let Value::Keyword(idx) = key else { + unreachable!() + }; + let dict = self.pop(); + let value = match dict { + Value::Dict(d) => d.as_ref().get(&idx).unwrap_or(&Value::Nil).clone(), + _ => Value::Nil, + }; + self.push(value); + self.ip += 1; + self.interpret() + } + MatchTuple => { + todo!() + } + PanicNoWhen => Err(Panic("no match")), } } }