first draft of complex string matching, discover jump mistake

This commit is contained in:
Scott Richmond 2025-06-05 23:26:42 -04:00
parent 77faf67191
commit 0347d10db7
5 changed files with 139 additions and 38 deletions

View File

@ -17,3 +17,4 @@ ordered-float = "4.5.0"
index_vec = "0.1.4" index_vec = "0.1.4"
num-derive = "0.4.2" num-derive = "0.4.2"
num-traits = "0.2.19" num-traits = "0.2.19"
regex = "1.11.1"

View File

@ -5,7 +5,7 @@ use crate::value::*;
use chumsky::prelude::SimpleSpan; use chumsky::prelude::SimpleSpan;
use num_derive::{FromPrimitive, ToPrimitive}; use num_derive::{FromPrimitive, ToPrimitive};
use num_traits::FromPrimitive; use num_traits::FromPrimitive;
use std::borrow::Borrow; use regex::Regex;
use std::cell::RefCell; use std::cell::RefCell;
use std::collections::HashMap; use std::collections::HashMap;
use std::rc::Rc; use std::rc::Rc;
@ -35,6 +35,8 @@ pub enum Op {
MatchFalse, MatchFalse,
PanicIfNoMatch, PanicIfNoMatch,
MatchConstant, MatchConstant,
MatchString,
PushStringMatches,
MatchType, MatchType,
MatchTuple, MatchTuple,
PushTuple, PushTuple,
@ -153,6 +155,8 @@ impl std::fmt::Display for Op {
ResetMatch => "reset_match", ResetMatch => "reset_match",
PanicIfNoMatch => "panic_if_no_match", PanicIfNoMatch => "panic_if_no_match",
MatchConstant => "match_constant", MatchConstant => "match_constant",
MatchString => "match_string",
PushStringMatches => "push_string_matches",
MatchType => "match_type", MatchType => "match_type",
MatchTuple => "match_tuple", MatchTuple => "match_tuple",
PushTuple => "push_tuple", PushTuple => "push_tuple",
@ -223,12 +227,18 @@ pub struct Upvalue {
stack_pos: usize, stack_pos: usize,
} }
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug)]
pub struct StrPattern {
pub words: Vec<String>,
pub re: Regex,
}
#[derive(Clone, Debug)]
pub struct Chunk { pub struct Chunk {
pub constants: Vec<Value>, pub constants: Vec<Value>,
pub bytecode: Vec<u8>, pub bytecode: Vec<u8>,
pub strings: Vec<&'static str>,
pub keywords: Vec<&'static str>, pub keywords: Vec<&'static str>,
pub string_patterns: Vec<StrPattern>,
} }
impl Chunk { impl Chunk {
@ -253,7 +263,7 @@ impl Chunk {
PushBinding | MatchTuple | MatchList | MatchDict | LoadDictValue | PushTuple PushBinding | MatchTuple | MatchList | MatchDict | LoadDictValue | PushTuple
| PushBox | Jump | JumpIfFalse | JumpIfTrue | JumpIfNoMatch | JumpIfMatch | PushBox | Jump | JumpIfFalse | JumpIfTrue | JumpIfNoMatch | JumpIfMatch
| JumpBack | JumpIfZero | MatchDepth | PopN | StoreAt | Call | SetUpvalue | JumpBack | JumpIfZero | MatchDepth | PopN | StoreAt | Call | SetUpvalue
| GetUpvalue | Partial => { | GetUpvalue | Partial | MatchString | PushStringMatches => {
let next = self.bytecode[*i + 1]; let next = self.bytecode[*i + 1];
println!("{i:04}: {:16} {next:03}", op.to_string()); println!("{i:04}: {:16} {next:03}", op.to_string());
*i += 1; *i += 1;
@ -311,7 +321,7 @@ fn get_builtin(name: &str, arity: usize) -> Option<Op> {
} }
} }
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone)]
pub struct Compiler<'a> { pub struct Compiler<'a> {
pub chunk: Chunk, pub chunk: Chunk,
pub bindings: Vec<Binding>, pub bindings: Vec<Binding>,
@ -353,10 +363,8 @@ impl<'a> Compiler<'a> {
let chunk = Chunk { let chunk = Chunk {
constants: vec![], constants: vec![],
bytecode: vec![], bytecode: vec![],
strings: vec![], keywords: vec![],
keywords: vec![ string_patterns: vec![],
"nil", "bool", "number", "keyword", "string", "tuple", "list", "dict", "box", "fn",
],
}; };
Compiler { Compiler {
chunk, chunk,
@ -511,7 +519,6 @@ impl<'a> Compiler<'a> {
// } // }
fn pop(&mut self) { fn pop(&mut self) {
println!("Popping from: {}", self.ast);
self.emit_op(Op::Pop); self.emit_op(Op::Pop);
self.stack_depth -= 1; self.stack_depth -= 1;
} }
@ -694,14 +701,14 @@ impl<'a> Compiler<'a> {
let jump_idx = self.len(); let jump_idx = self.len();
self.emit_byte(0xff); self.emit_byte(0xff);
for idx in jump_idxes { for idx in jump_idxes {
self.chunk.bytecode[idx] = self.len() as u8 - idx as u8 - 1; self.chunk.bytecode[idx] = (self.len() - idx) as u8 - 1;
} }
for _ in 0..members.len() { for _ in 0..members.len() {
self.emit_op(Op::Pop); self.emit_op(Op::Pop);
} }
self.chunk.bytecode[before_load_tup_idx] = self.chunk.bytecode[before_load_tup_idx] =
self.len() as u8 - before_load_tup_idx as u8 - 1; (self.len() - before_load_tup_idx) as u8 - 1;
self.chunk.bytecode[jump_idx] = self.len() as u8 - jump_idx as u8 - 1; self.chunk.bytecode[jump_idx] = (self.len() - jump_idx) as u8 - 1;
} }
ListPattern(members) => { ListPattern(members) => {
self.emit_op(Op::MatchList); self.emit_op(Op::MatchList);
@ -726,14 +733,14 @@ impl<'a> Compiler<'a> {
let jump_idx = self.len(); let jump_idx = self.len();
self.emit_byte(0xff); self.emit_byte(0xff);
for idx in jump_idxes { for idx in jump_idxes {
self.chunk.bytecode[idx] = self.len() as u8 - idx as u8 - 1; self.chunk.bytecode[idx] = (self.len() - idx) as u8 - 1;
} }
for _ in 0..members.len() { for _ in 0..members.len() {
self.emit_op(Op::Pop); self.emit_op(Op::Pop);
} }
self.chunk.bytecode[before_load_list_idx] = self.chunk.bytecode[before_load_list_idx] =
self.len() as u8 - before_load_list_idx as u8 - 1; (self.len() - before_load_list_idx) as u8 - 1;
self.chunk.bytecode[jump_idx] = self.len() as u8 - jump_idx as u8 - 1; self.chunk.bytecode[jump_idx] = (self.len() - jump_idx) as u8 - 1;
} }
DictPattern(pairs) => { DictPattern(pairs) => {
self.emit_op(Op::MatchDict); self.emit_op(Op::MatchDict);
@ -759,14 +766,66 @@ impl<'a> Compiler<'a> {
let jump_idx = self.len(); let jump_idx = self.len();
self.emit_byte(0xff); self.emit_byte(0xff);
for idx in jump_idxes { for idx in jump_idxes {
self.chunk.bytecode[idx] = self.len() as u8 - idx as u8 - 1; self.chunk.bytecode[idx] = (self.len() - idx) as u8 - 1;
} }
for _ in 0..pairs.len() { for _ in 0..pairs.len() {
self.emit_op(Op::Pop); self.emit_op(Op::Pop);
} }
self.chunk.bytecode[before_load_dict_idx] = self.chunk.bytecode[before_load_dict_idx] =
self.len() as u8 - before_load_dict_idx as u8 - 1; (self.len() - before_load_dict_idx) as u8 - 1;
self.chunk.bytecode[jump_idx] = self.len() as u8 - jump_idx as u8 - 1; self.chunk.bytecode[jump_idx] = (self.len() - jump_idx) as u8 - 1;
}
Splattern(..) => {
todo!()
}
InterpolatedPattern(parts, _) => {
println!("An interpolated pattern of {} parts", parts.len());
let mut pattern = "".to_string();
let mut words = vec![];
for (part, _) in parts {
match part {
StringPart::Word(word) => {
println!("wordpart: {word}");
words.push(word.clone());
pattern.push_str("(.*)");
}
StringPart::Data(data) => {
println!("datapart: {data}");
let data = regex::escape(data);
pattern.push_str(data.as_str());
}
StringPart::Inline(..) => unreachable!(),
}
}
let re = Regex::new(pattern.as_str()).unwrap();
let moar_words = words.clone();
let string_pattern = StrPattern { words, re };
let pattern_idx = self.chunk.string_patterns.len();
self.chunk.string_patterns.push(string_pattern);
self.emit_op(Op::MatchString);
self.emit_byte(pattern_idx);
self.emit_op(Op::JumpIfNoMatch);
let jnm_idx = self.len();
self.emit_byte(0xff);
self.emit_op(Op::PushStringMatches);
self.emit_byte(pattern_idx);
for word in moar_words {
let name: &'static str = std::string::String::leak(word);
let binding = Binding {
name,
depth: self.scope_depth,
stack_pos: self.stack_depth,
};
self.bindings.push(binding);
self.stack_depth += 1;
}
self.chunk.bytecode[jnm_idx] = (self.len() - jnm_idx - 1) as u8;
} }
PairPattern(_, _) => unreachable!(), PairPattern(_, _) => unreachable!(),
Tuple(members) => { Tuple(members) => {
@ -938,11 +997,11 @@ impl<'a> Compiler<'a> {
self.emit_op(Op::Jump); self.emit_op(Op::Jump);
jump_idxes.push(self.len()); jump_idxes.push(self.len());
self.emit_byte(0xff); self.emit_byte(0xff);
self.chunk.bytecode[jif_jump_idx] = self.len() as u8 - jif_jump_idx as u8 - 1; self.chunk.bytecode[jif_jump_idx] = (self.len() - jif_jump_idx) as u8 - 1;
} }
self.emit_op(Op::PanicNoWhen); self.emit_op(Op::PanicNoWhen);
for idx in jump_idxes { for idx in jump_idxes {
self.chunk.bytecode[idx] = self.len() as u8 - idx as u8 - 1; self.chunk.bytecode[idx] = (self.len() - idx) as u8 - 1;
} }
self.stack_depth += 1; self.stack_depth += 1;
} }
@ -986,12 +1045,12 @@ impl<'a> Compiler<'a> {
jump_idxes.push(self.len()); jump_idxes.push(self.len());
self.emit_byte(0xff); self.emit_byte(0xff);
for idx in no_match_jumps { for idx in no_match_jumps {
self.chunk.bytecode[idx] = self.len() as u8 - idx as u8 - 1; self.chunk.bytecode[idx] = (self.len() - idx) as u8 - 1;
} }
} }
self.emit_op(Op::PanicNoMatch); self.emit_op(Op::PanicNoMatch);
for idx in jump_idxes { for idx in jump_idxes {
self.chunk.bytecode[idx] = self.len() as u8 - idx as u8 - 1; self.chunk.bytecode[idx] = (self.len() - idx) as u8 - 1;
} }
while self.stack_depth > stack_depth { while self.stack_depth > stack_depth {
self.pop(); self.pop();
@ -1214,12 +1273,12 @@ impl<'a> Compiler<'a> {
let jump_idx = self.len(); let jump_idx = self.len();
self.emit_byte(0xff); self.emit_byte(0xff);
for idx in tup_jump_idxes { for idx in tup_jump_idxes {
self.chunk.bytecode[idx] = self.len() as u8 - idx as u8 - 2; self.chunk.bytecode[idx] = (self.len() - idx) as u8 - 2;
} }
for _ in 0..arity { for _ in 0..arity {
self.emit_op(Op::Pop); self.emit_op(Op::Pop);
} }
self.chunk.bytecode[jump_idx] = self.len() as u8 - jump_idx as u8 - 1; self.chunk.bytecode[jump_idx] = (self.len() - jump_idx) as u8 - 1;
self.emit_op(Op::JumpIfNoMatch); self.emit_op(Op::JumpIfNoMatch);
let jnm_idx = self.len(); let jnm_idx = self.len();
self.emit_byte(0xff); self.emit_byte(0xff);
@ -1240,12 +1299,12 @@ impl<'a> Compiler<'a> {
self.emit_op(Op::Jump); self.emit_op(Op::Jump);
jump_idxes.push(self.len()); jump_idxes.push(self.len());
self.emit_byte(0xff); self.emit_byte(0xff);
self.chunk.bytecode[jnm_idx] = self.len() as u8 - jnm_idx as u8; self.chunk.bytecode[jnm_idx] = (self.len() - jnm_idx) as u8;
self.scope_depth -= 1; self.scope_depth -= 1;
} }
self.emit_op(Op::PanicNoMatch); self.emit_op(Op::PanicNoMatch);
for idx in jump_idxes { for idx in jump_idxes {
self.chunk.bytecode[idx] = self.len() as u8 - idx as u8 - 1; self.chunk.bytecode[idx] = (self.len() - idx) as u8 - 1;
} }
self.emit_op(Op::PopN); self.emit_op(Op::PopN);
self.emit_byte(arity); self.emit_byte(arity);
@ -1305,10 +1364,7 @@ impl<'a> Compiler<'a> {
Placeholder => { Placeholder => {
self.emit_op(Op::Nothing); self.emit_op(Op::Nothing);
} }
Arguments(..) | Placeholder | InterpolatedPattern(..) | Splattern(..) => { And | Or | Arguments(..) => unreachable!(),
todo!()
}
And | Or => unreachable!(),
} }
} }

View File

@ -74,11 +74,18 @@ pub fn run(src: &'static str) {
} }
pub fn main() { pub fn main() {
env::set_var("RUST_BACKTRACE", "1"); // env::set_var("RUST_BACKTRACE", "1");
let src = " let src = "
fn add2 (x, y) -> add (x, y) let x = {
match #{:a 1, :b 2, :c 3} with {
add2 (_, 2) (2) #{a} -> :one
#{a, b, :c 3} -> :two
#{a, b, c} -> :three
(1, 2, 3) -> :thing
(4, 5, (6, 7, a)) -> if or (true, false, false, true) then :thing_1 else :thing_2
([:a, :b, :c, [:d, [:e, (:f, :g)]]]) -> if or (true, false, false, true) then :thing_1 else :thing_2
}
}
"; ";
run(src); run(src);

View File

@ -6,7 +6,7 @@ use imbl::{HashMap, Vector};
use std::cell::RefCell; use std::cell::RefCell;
use std::rc::Rc; use std::rc::Rc;
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug)]
pub enum LFn { pub enum LFn {
Declared { Declared {
name: &'static str, name: &'static str,
@ -107,7 +107,7 @@ impl PartialEq for Value {
(List(x), List(y)) => x == y, (List(x), List(y)) => x == y,
(Dict(x), Dict(y)) => x == y, (Dict(x), Dict(y)) => x == y,
(Box(x), Box(y)) => std::ptr::eq(x.as_ref().as_ptr(), y.as_ref().as_ptr()), (Box(x), Box(y)) => std::ptr::eq(x.as_ref().as_ptr(), y.as_ref().as_ptr()),
(Fn(x), Fn(y)) => x == y, (Fn(x), Fn(y)) => std::ptr::eq(x, y),
(BaseFn(x), BaseFn(y)) => std::ptr::eq(x, y), (BaseFn(x), BaseFn(y)) => std::ptr::eq(x, y),
(Partial(x), Partial(y)) => x == y, (Partial(x), Partial(y)) => x == y,
_ => false, _ => false,

View File

@ -342,6 +342,43 @@ impl Vm {
self.matches = self.stack[idx] == self.chunk().constants[const_idx as usize]; self.matches = self.stack[idx] == self.chunk().constants[const_idx as usize];
self.ip += 2; self.ip += 2;
} }
MatchString => {
let pattern_idx = self.chunk().bytecode[self.ip + 1];
self.ip += 2;
let scrutinee_idx = self.stack.len() - self.match_depth as usize - 1;
let scrutinee = self.stack[scrutinee_idx].clone();
self.matches = match scrutinee {
Value::String(str) => self.chunk().string_patterns[pattern_idx as usize]
.re
.is_match(str.as_str()),
Value::Interned(str) => self.chunk().string_patterns[pattern_idx as usize]
.re
.is_match(str),
_ => false,
};
}
PushStringMatches => {
let pattern_idx = self.chunk().bytecode[self.ip + 1];
self.ip += 2;
let pattern_len = self.chunk().string_patterns[pattern_idx as usize]
.words
.len();
let scrutinee_idx = self.stack.len() - self.match_depth as usize - 1;
let scrutinee = self.stack[scrutinee_idx].clone();
let scrutinee = match scrutinee {
Value::String(str) => str.as_ref().clone(),
Value::Interned(str) => str.to_string(),
_ => unreachable!(),
};
let captures = self.chunk().string_patterns[pattern_idx as usize]
.re
.captures(scrutinee.as_str())
.unwrap();
for cap in 0..pattern_len {
self.push(Value::String(Rc::new(captures[cap + 1].to_string())))
}
self.match_depth += pattern_len as u8;
}
MatchTuple => { MatchTuple => {
let idx = self.stack.len() - self.match_depth as usize - 1; let idx = self.stack.len() - self.match_depth as usize - 1;
let tuple_len = self.chunk().bytecode[self.ip + 1]; let tuple_len = self.chunk().bytecode[self.ip + 1];