diff --git a/Cargo.toml b/Cargo.toml index ff13e36..2d3722c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,3 +17,4 @@ ordered-float = "4.5.0" index_vec = "0.1.4" num-derive = "0.4.2" num-traits = "0.2.19" +regex = "1.11.1" diff --git a/src/compiler.rs b/src/compiler.rs index 5e73c2e..69e9fa5 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -5,7 +5,7 @@ use crate::value::*; use chumsky::prelude::SimpleSpan; use num_derive::{FromPrimitive, ToPrimitive}; use num_traits::FromPrimitive; -use std::borrow::Borrow; +use regex::Regex; use std::cell::RefCell; use std::collections::HashMap; use std::rc::Rc; @@ -35,6 +35,8 @@ pub enum Op { MatchFalse, PanicIfNoMatch, MatchConstant, + MatchString, + PushStringMatches, MatchType, MatchTuple, PushTuple, @@ -153,6 +155,8 @@ impl std::fmt::Display for Op { ResetMatch => "reset_match", PanicIfNoMatch => "panic_if_no_match", MatchConstant => "match_constant", + MatchString => "match_string", + PushStringMatches => "push_string_matches", MatchType => "match_type", MatchTuple => "match_tuple", PushTuple => "push_tuple", @@ -223,12 +227,18 @@ pub struct Upvalue { stack_pos: usize, } -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug)] +pub struct StrPattern { + pub words: Vec, + pub re: Regex, +} + +#[derive(Clone, Debug)] pub struct Chunk { pub constants: Vec, pub bytecode: Vec, - pub strings: Vec<&'static str>, pub keywords: Vec<&'static str>, + pub string_patterns: Vec, } impl Chunk { @@ -253,7 +263,7 @@ impl Chunk { PushBinding | MatchTuple | MatchList | MatchDict | LoadDictValue | PushTuple | PushBox | Jump | JumpIfFalse | JumpIfTrue | JumpIfNoMatch | JumpIfMatch | JumpBack | JumpIfZero | MatchDepth | PopN | StoreAt | Call | SetUpvalue - | GetUpvalue | Partial => { + | GetUpvalue | Partial | MatchString | PushStringMatches => { let next = self.bytecode[*i + 1]; println!("{i:04}: {:16} {next:03}", op.to_string()); *i += 1; @@ -311,7 +321,7 @@ fn get_builtin(name: &str, arity: usize) -> Option { } } -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone)] pub struct Compiler<'a> { pub chunk: Chunk, pub bindings: Vec, @@ -353,10 +363,8 @@ impl<'a> Compiler<'a> { let chunk = Chunk { constants: vec![], bytecode: vec![], - strings: vec![], - keywords: vec![ - "nil", "bool", "number", "keyword", "string", "tuple", "list", "dict", "box", "fn", - ], + keywords: vec![], + string_patterns: vec![], }; Compiler { chunk, @@ -511,7 +519,6 @@ impl<'a> Compiler<'a> { // } fn pop(&mut self) { - println!("Popping from: {}", self.ast); self.emit_op(Op::Pop); self.stack_depth -= 1; } @@ -694,14 +701,14 @@ impl<'a> Compiler<'a> { let jump_idx = self.len(); self.emit_byte(0xff); for idx in jump_idxes { - self.chunk.bytecode[idx] = self.len() as u8 - idx as u8 - 1; + self.chunk.bytecode[idx] = (self.len() - idx) as u8 - 1; } for _ in 0..members.len() { self.emit_op(Op::Pop); } self.chunk.bytecode[before_load_tup_idx] = - self.len() as u8 - before_load_tup_idx as u8 - 1; - self.chunk.bytecode[jump_idx] = self.len() as u8 - jump_idx as u8 - 1; + (self.len() - before_load_tup_idx) as u8 - 1; + self.chunk.bytecode[jump_idx] = (self.len() - jump_idx) as u8 - 1; } ListPattern(members) => { self.emit_op(Op::MatchList); @@ -726,14 +733,14 @@ impl<'a> Compiler<'a> { let jump_idx = self.len(); self.emit_byte(0xff); for idx in jump_idxes { - self.chunk.bytecode[idx] = self.len() as u8 - idx as u8 - 1; + self.chunk.bytecode[idx] = (self.len() - idx) as u8 - 1; } for _ in 0..members.len() { self.emit_op(Op::Pop); } self.chunk.bytecode[before_load_list_idx] = - self.len() as u8 - before_load_list_idx as u8 - 1; - self.chunk.bytecode[jump_idx] = self.len() as u8 - jump_idx as u8 - 1; + (self.len() - before_load_list_idx) as u8 - 1; + self.chunk.bytecode[jump_idx] = (self.len() - jump_idx) as u8 - 1; } DictPattern(pairs) => { self.emit_op(Op::MatchDict); @@ -759,14 +766,66 @@ impl<'a> Compiler<'a> { let jump_idx = self.len(); self.emit_byte(0xff); for idx in jump_idxes { - self.chunk.bytecode[idx] = self.len() as u8 - idx as u8 - 1; + self.chunk.bytecode[idx] = (self.len() - idx) as u8 - 1; } for _ in 0..pairs.len() { self.emit_op(Op::Pop); } self.chunk.bytecode[before_load_dict_idx] = - self.len() as u8 - before_load_dict_idx as u8 - 1; - self.chunk.bytecode[jump_idx] = self.len() as u8 - jump_idx as u8 - 1; + (self.len() - before_load_dict_idx) as u8 - 1; + self.chunk.bytecode[jump_idx] = (self.len() - jump_idx) as u8 - 1; + } + Splattern(..) => { + todo!() + } + InterpolatedPattern(parts, _) => { + println!("An interpolated pattern of {} parts", parts.len()); + let mut pattern = "".to_string(); + let mut words = vec![]; + for (part, _) in parts { + match part { + StringPart::Word(word) => { + println!("wordpart: {word}"); + words.push(word.clone()); + pattern.push_str("(.*)"); + } + StringPart::Data(data) => { + println!("datapart: {data}"); + let data = regex::escape(data); + pattern.push_str(data.as_str()); + } + StringPart::Inline(..) => unreachable!(), + } + } + let re = Regex::new(pattern.as_str()).unwrap(); + let moar_words = words.clone(); + let string_pattern = StrPattern { words, re }; + + let pattern_idx = self.chunk.string_patterns.len(); + self.chunk.string_patterns.push(string_pattern); + + self.emit_op(Op::MatchString); + self.emit_byte(pattern_idx); + + self.emit_op(Op::JumpIfNoMatch); + let jnm_idx = self.len(); + self.emit_byte(0xff); + + self.emit_op(Op::PushStringMatches); + self.emit_byte(pattern_idx); + + for word in moar_words { + let name: &'static str = std::string::String::leak(word); + let binding = Binding { + name, + depth: self.scope_depth, + stack_pos: self.stack_depth, + }; + self.bindings.push(binding); + self.stack_depth += 1; + } + + self.chunk.bytecode[jnm_idx] = (self.len() - jnm_idx - 1) as u8; } PairPattern(_, _) => unreachable!(), Tuple(members) => { @@ -938,11 +997,11 @@ impl<'a> Compiler<'a> { self.emit_op(Op::Jump); jump_idxes.push(self.len()); self.emit_byte(0xff); - self.chunk.bytecode[jif_jump_idx] = self.len() as u8 - jif_jump_idx as u8 - 1; + self.chunk.bytecode[jif_jump_idx] = (self.len() - jif_jump_idx) as u8 - 1; } self.emit_op(Op::PanicNoWhen); for idx in jump_idxes { - self.chunk.bytecode[idx] = self.len() as u8 - idx as u8 - 1; + self.chunk.bytecode[idx] = (self.len() - idx) as u8 - 1; } self.stack_depth += 1; } @@ -986,12 +1045,12 @@ impl<'a> Compiler<'a> { jump_idxes.push(self.len()); self.emit_byte(0xff); for idx in no_match_jumps { - self.chunk.bytecode[idx] = self.len() as u8 - idx as u8 - 1; + self.chunk.bytecode[idx] = (self.len() - idx) as u8 - 1; } } self.emit_op(Op::PanicNoMatch); for idx in jump_idxes { - self.chunk.bytecode[idx] = self.len() as u8 - idx as u8 - 1; + self.chunk.bytecode[idx] = (self.len() - idx) as u8 - 1; } while self.stack_depth > stack_depth { self.pop(); @@ -1214,12 +1273,12 @@ impl<'a> Compiler<'a> { let jump_idx = self.len(); self.emit_byte(0xff); for idx in tup_jump_idxes { - self.chunk.bytecode[idx] = self.len() as u8 - idx as u8 - 2; + self.chunk.bytecode[idx] = (self.len() - idx) as u8 - 2; } for _ in 0..arity { self.emit_op(Op::Pop); } - self.chunk.bytecode[jump_idx] = self.len() as u8 - jump_idx as u8 - 1; + self.chunk.bytecode[jump_idx] = (self.len() - jump_idx) as u8 - 1; self.emit_op(Op::JumpIfNoMatch); let jnm_idx = self.len(); self.emit_byte(0xff); @@ -1240,12 +1299,12 @@ impl<'a> Compiler<'a> { self.emit_op(Op::Jump); jump_idxes.push(self.len()); self.emit_byte(0xff); - self.chunk.bytecode[jnm_idx] = self.len() as u8 - jnm_idx as u8; + self.chunk.bytecode[jnm_idx] = (self.len() - jnm_idx) as u8; self.scope_depth -= 1; } self.emit_op(Op::PanicNoMatch); for idx in jump_idxes { - self.chunk.bytecode[idx] = self.len() as u8 - idx as u8 - 1; + self.chunk.bytecode[idx] = (self.len() - idx) as u8 - 1; } self.emit_op(Op::PopN); self.emit_byte(arity); @@ -1305,10 +1364,7 @@ impl<'a> Compiler<'a> { Placeholder => { self.emit_op(Op::Nothing); } - Arguments(..) | Placeholder | InterpolatedPattern(..) | Splattern(..) => { - todo!() - } - And | Or => unreachable!(), + And | Or | Arguments(..) => unreachable!(), } } diff --git a/src/main.rs b/src/main.rs index cb02108..1a16c27 100644 --- a/src/main.rs +++ b/src/main.rs @@ -74,12 +74,19 @@ pub fn run(src: &'static str) { } pub fn main() { - env::set_var("RUST_BACKTRACE", "1"); + // env::set_var("RUST_BACKTRACE", "1"); let src = " - fn add2 (x, y) -> add (x, y) +let x = { + match #{:a 1, :b 2, :c 3} with { + #{a} -> :one + #{a, b, :c 3} -> :two + #{a, b, c} -> :three + (1, 2, 3) -> :thing + (4, 5, (6, 7, a)) -> if or (true, false, false, true) then :thing_1 else :thing_2 + ([:a, :b, :c, [:d, [:e, (:f, :g)]]]) -> if or (true, false, false, true) then :thing_1 else :thing_2 + } +} - add2 (_, 2) (2) - - "; + "; run(src); } diff --git a/src/value.rs b/src/value.rs index ba13b28..190f343 100644 --- a/src/value.rs +++ b/src/value.rs @@ -6,7 +6,7 @@ use imbl::{HashMap, Vector}; use std::cell::RefCell; use std::rc::Rc; -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug)] pub enum LFn { Declared { name: &'static str, @@ -107,7 +107,7 @@ impl PartialEq for Value { (List(x), List(y)) => x == y, (Dict(x), Dict(y)) => x == y, (Box(x), Box(y)) => std::ptr::eq(x.as_ref().as_ptr(), y.as_ref().as_ptr()), - (Fn(x), Fn(y)) => x == y, + (Fn(x), Fn(y)) => std::ptr::eq(x, y), (BaseFn(x), BaseFn(y)) => std::ptr::eq(x, y), (Partial(x), Partial(y)) => x == y, _ => false, diff --git a/src/vm.rs b/src/vm.rs index e284908..2d6cbd9 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -342,6 +342,43 @@ impl Vm { self.matches = self.stack[idx] == self.chunk().constants[const_idx as usize]; self.ip += 2; } + MatchString => { + let pattern_idx = self.chunk().bytecode[self.ip + 1]; + self.ip += 2; + let scrutinee_idx = self.stack.len() - self.match_depth as usize - 1; + let scrutinee = self.stack[scrutinee_idx].clone(); + self.matches = match scrutinee { + Value::String(str) => self.chunk().string_patterns[pattern_idx as usize] + .re + .is_match(str.as_str()), + Value::Interned(str) => self.chunk().string_patterns[pattern_idx as usize] + .re + .is_match(str), + _ => false, + }; + } + PushStringMatches => { + let pattern_idx = self.chunk().bytecode[self.ip + 1]; + self.ip += 2; + let pattern_len = self.chunk().string_patterns[pattern_idx as usize] + .words + .len(); + let scrutinee_idx = self.stack.len() - self.match_depth as usize - 1; + let scrutinee = self.stack[scrutinee_idx].clone(); + let scrutinee = match scrutinee { + Value::String(str) => str.as_ref().clone(), + Value::Interned(str) => str.to_string(), + _ => unreachable!(), + }; + let captures = self.chunk().string_patterns[pattern_idx as usize] + .re + .captures(scrutinee.as_str()) + .unwrap(); + for cap in 0..pattern_len { + self.push(Value::String(Rc::new(captures[cap + 1].to_string()))) + } + self.match_depth += pattern_len as u8; + } MatchTuple => { let idx = self.stack.len() - self.match_depth as usize - 1; let tuple_len = self.chunk().bytecode[self.ip + 1];