first draft of complex string matching, discover jump mistake
This commit is contained in:
parent
77faf67191
commit
0347d10db7
|
@ -17,3 +17,4 @@ ordered-float = "4.5.0"
|
||||||
index_vec = "0.1.4"
|
index_vec = "0.1.4"
|
||||||
num-derive = "0.4.2"
|
num-derive = "0.4.2"
|
||||||
num-traits = "0.2.19"
|
num-traits = "0.2.19"
|
||||||
|
regex = "1.11.1"
|
||||||
|
|
118
src/compiler.rs
118
src/compiler.rs
|
@ -5,7 +5,7 @@ use crate::value::*;
|
||||||
use chumsky::prelude::SimpleSpan;
|
use chumsky::prelude::SimpleSpan;
|
||||||
use num_derive::{FromPrimitive, ToPrimitive};
|
use num_derive::{FromPrimitive, ToPrimitive};
|
||||||
use num_traits::FromPrimitive;
|
use num_traits::FromPrimitive;
|
||||||
use std::borrow::Borrow;
|
use regex::Regex;
|
||||||
use std::cell::RefCell;
|
use std::cell::RefCell;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::rc::Rc;
|
use std::rc::Rc;
|
||||||
|
@ -35,6 +35,8 @@ pub enum Op {
|
||||||
MatchFalse,
|
MatchFalse,
|
||||||
PanicIfNoMatch,
|
PanicIfNoMatch,
|
||||||
MatchConstant,
|
MatchConstant,
|
||||||
|
MatchString,
|
||||||
|
PushStringMatches,
|
||||||
MatchType,
|
MatchType,
|
||||||
MatchTuple,
|
MatchTuple,
|
||||||
PushTuple,
|
PushTuple,
|
||||||
|
@ -153,6 +155,8 @@ impl std::fmt::Display for Op {
|
||||||
ResetMatch => "reset_match",
|
ResetMatch => "reset_match",
|
||||||
PanicIfNoMatch => "panic_if_no_match",
|
PanicIfNoMatch => "panic_if_no_match",
|
||||||
MatchConstant => "match_constant",
|
MatchConstant => "match_constant",
|
||||||
|
MatchString => "match_string",
|
||||||
|
PushStringMatches => "push_string_matches",
|
||||||
MatchType => "match_type",
|
MatchType => "match_type",
|
||||||
MatchTuple => "match_tuple",
|
MatchTuple => "match_tuple",
|
||||||
PushTuple => "push_tuple",
|
PushTuple => "push_tuple",
|
||||||
|
@ -223,12 +227,18 @@ pub struct Upvalue {
|
||||||
stack_pos: usize,
|
stack_pos: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq)]
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct StrPattern {
|
||||||
|
pub words: Vec<String>,
|
||||||
|
pub re: Regex,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
pub struct Chunk {
|
pub struct Chunk {
|
||||||
pub constants: Vec<Value>,
|
pub constants: Vec<Value>,
|
||||||
pub bytecode: Vec<u8>,
|
pub bytecode: Vec<u8>,
|
||||||
pub strings: Vec<&'static str>,
|
|
||||||
pub keywords: Vec<&'static str>,
|
pub keywords: Vec<&'static str>,
|
||||||
|
pub string_patterns: Vec<StrPattern>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Chunk {
|
impl Chunk {
|
||||||
|
@ -253,7 +263,7 @@ impl Chunk {
|
||||||
PushBinding | MatchTuple | MatchList | MatchDict | LoadDictValue | PushTuple
|
PushBinding | MatchTuple | MatchList | MatchDict | LoadDictValue | PushTuple
|
||||||
| PushBox | Jump | JumpIfFalse | JumpIfTrue | JumpIfNoMatch | JumpIfMatch
|
| PushBox | Jump | JumpIfFalse | JumpIfTrue | JumpIfNoMatch | JumpIfMatch
|
||||||
| JumpBack | JumpIfZero | MatchDepth | PopN | StoreAt | Call | SetUpvalue
|
| JumpBack | JumpIfZero | MatchDepth | PopN | StoreAt | Call | SetUpvalue
|
||||||
| GetUpvalue | Partial => {
|
| GetUpvalue | Partial | MatchString | PushStringMatches => {
|
||||||
let next = self.bytecode[*i + 1];
|
let next = self.bytecode[*i + 1];
|
||||||
println!("{i:04}: {:16} {next:03}", op.to_string());
|
println!("{i:04}: {:16} {next:03}", op.to_string());
|
||||||
*i += 1;
|
*i += 1;
|
||||||
|
@ -311,7 +321,7 @@ fn get_builtin(name: &str, arity: usize) -> Option<Op> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct Compiler<'a> {
|
pub struct Compiler<'a> {
|
||||||
pub chunk: Chunk,
|
pub chunk: Chunk,
|
||||||
pub bindings: Vec<Binding>,
|
pub bindings: Vec<Binding>,
|
||||||
|
@ -353,10 +363,8 @@ impl<'a> Compiler<'a> {
|
||||||
let chunk = Chunk {
|
let chunk = Chunk {
|
||||||
constants: vec![],
|
constants: vec![],
|
||||||
bytecode: vec![],
|
bytecode: vec![],
|
||||||
strings: vec![],
|
keywords: vec![],
|
||||||
keywords: vec![
|
string_patterns: vec![],
|
||||||
"nil", "bool", "number", "keyword", "string", "tuple", "list", "dict", "box", "fn",
|
|
||||||
],
|
|
||||||
};
|
};
|
||||||
Compiler {
|
Compiler {
|
||||||
chunk,
|
chunk,
|
||||||
|
@ -511,7 +519,6 @@ impl<'a> Compiler<'a> {
|
||||||
// }
|
// }
|
||||||
|
|
||||||
fn pop(&mut self) {
|
fn pop(&mut self) {
|
||||||
println!("Popping from: {}", self.ast);
|
|
||||||
self.emit_op(Op::Pop);
|
self.emit_op(Op::Pop);
|
||||||
self.stack_depth -= 1;
|
self.stack_depth -= 1;
|
||||||
}
|
}
|
||||||
|
@ -694,14 +701,14 @@ impl<'a> Compiler<'a> {
|
||||||
let jump_idx = self.len();
|
let jump_idx = self.len();
|
||||||
self.emit_byte(0xff);
|
self.emit_byte(0xff);
|
||||||
for idx in jump_idxes {
|
for idx in jump_idxes {
|
||||||
self.chunk.bytecode[idx] = self.len() as u8 - idx as u8 - 1;
|
self.chunk.bytecode[idx] = (self.len() - idx) as u8 - 1;
|
||||||
}
|
}
|
||||||
for _ in 0..members.len() {
|
for _ in 0..members.len() {
|
||||||
self.emit_op(Op::Pop);
|
self.emit_op(Op::Pop);
|
||||||
}
|
}
|
||||||
self.chunk.bytecode[before_load_tup_idx] =
|
self.chunk.bytecode[before_load_tup_idx] =
|
||||||
self.len() as u8 - before_load_tup_idx as u8 - 1;
|
(self.len() - before_load_tup_idx) as u8 - 1;
|
||||||
self.chunk.bytecode[jump_idx] = self.len() as u8 - jump_idx as u8 - 1;
|
self.chunk.bytecode[jump_idx] = (self.len() - jump_idx) as u8 - 1;
|
||||||
}
|
}
|
||||||
ListPattern(members) => {
|
ListPattern(members) => {
|
||||||
self.emit_op(Op::MatchList);
|
self.emit_op(Op::MatchList);
|
||||||
|
@ -726,14 +733,14 @@ impl<'a> Compiler<'a> {
|
||||||
let jump_idx = self.len();
|
let jump_idx = self.len();
|
||||||
self.emit_byte(0xff);
|
self.emit_byte(0xff);
|
||||||
for idx in jump_idxes {
|
for idx in jump_idxes {
|
||||||
self.chunk.bytecode[idx] = self.len() as u8 - idx as u8 - 1;
|
self.chunk.bytecode[idx] = (self.len() - idx) as u8 - 1;
|
||||||
}
|
}
|
||||||
for _ in 0..members.len() {
|
for _ in 0..members.len() {
|
||||||
self.emit_op(Op::Pop);
|
self.emit_op(Op::Pop);
|
||||||
}
|
}
|
||||||
self.chunk.bytecode[before_load_list_idx] =
|
self.chunk.bytecode[before_load_list_idx] =
|
||||||
self.len() as u8 - before_load_list_idx as u8 - 1;
|
(self.len() - before_load_list_idx) as u8 - 1;
|
||||||
self.chunk.bytecode[jump_idx] = self.len() as u8 - jump_idx as u8 - 1;
|
self.chunk.bytecode[jump_idx] = (self.len() - jump_idx) as u8 - 1;
|
||||||
}
|
}
|
||||||
DictPattern(pairs) => {
|
DictPattern(pairs) => {
|
||||||
self.emit_op(Op::MatchDict);
|
self.emit_op(Op::MatchDict);
|
||||||
|
@ -759,14 +766,66 @@ impl<'a> Compiler<'a> {
|
||||||
let jump_idx = self.len();
|
let jump_idx = self.len();
|
||||||
self.emit_byte(0xff);
|
self.emit_byte(0xff);
|
||||||
for idx in jump_idxes {
|
for idx in jump_idxes {
|
||||||
self.chunk.bytecode[idx] = self.len() as u8 - idx as u8 - 1;
|
self.chunk.bytecode[idx] = (self.len() - idx) as u8 - 1;
|
||||||
}
|
}
|
||||||
for _ in 0..pairs.len() {
|
for _ in 0..pairs.len() {
|
||||||
self.emit_op(Op::Pop);
|
self.emit_op(Op::Pop);
|
||||||
}
|
}
|
||||||
self.chunk.bytecode[before_load_dict_idx] =
|
self.chunk.bytecode[before_load_dict_idx] =
|
||||||
self.len() as u8 - before_load_dict_idx as u8 - 1;
|
(self.len() - before_load_dict_idx) as u8 - 1;
|
||||||
self.chunk.bytecode[jump_idx] = self.len() as u8 - jump_idx as u8 - 1;
|
self.chunk.bytecode[jump_idx] = (self.len() - jump_idx) as u8 - 1;
|
||||||
|
}
|
||||||
|
Splattern(..) => {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
InterpolatedPattern(parts, _) => {
|
||||||
|
println!("An interpolated pattern of {} parts", parts.len());
|
||||||
|
let mut pattern = "".to_string();
|
||||||
|
let mut words = vec![];
|
||||||
|
for (part, _) in parts {
|
||||||
|
match part {
|
||||||
|
StringPart::Word(word) => {
|
||||||
|
println!("wordpart: {word}");
|
||||||
|
words.push(word.clone());
|
||||||
|
pattern.push_str("(.*)");
|
||||||
|
}
|
||||||
|
StringPart::Data(data) => {
|
||||||
|
println!("datapart: {data}");
|
||||||
|
let data = regex::escape(data);
|
||||||
|
pattern.push_str(data.as_str());
|
||||||
|
}
|
||||||
|
StringPart::Inline(..) => unreachable!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let re = Regex::new(pattern.as_str()).unwrap();
|
||||||
|
let moar_words = words.clone();
|
||||||
|
let string_pattern = StrPattern { words, re };
|
||||||
|
|
||||||
|
let pattern_idx = self.chunk.string_patterns.len();
|
||||||
|
self.chunk.string_patterns.push(string_pattern);
|
||||||
|
|
||||||
|
self.emit_op(Op::MatchString);
|
||||||
|
self.emit_byte(pattern_idx);
|
||||||
|
|
||||||
|
self.emit_op(Op::JumpIfNoMatch);
|
||||||
|
let jnm_idx = self.len();
|
||||||
|
self.emit_byte(0xff);
|
||||||
|
|
||||||
|
self.emit_op(Op::PushStringMatches);
|
||||||
|
self.emit_byte(pattern_idx);
|
||||||
|
|
||||||
|
for word in moar_words {
|
||||||
|
let name: &'static str = std::string::String::leak(word);
|
||||||
|
let binding = Binding {
|
||||||
|
name,
|
||||||
|
depth: self.scope_depth,
|
||||||
|
stack_pos: self.stack_depth,
|
||||||
|
};
|
||||||
|
self.bindings.push(binding);
|
||||||
|
self.stack_depth += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.chunk.bytecode[jnm_idx] = (self.len() - jnm_idx - 1) as u8;
|
||||||
}
|
}
|
||||||
PairPattern(_, _) => unreachable!(),
|
PairPattern(_, _) => unreachable!(),
|
||||||
Tuple(members) => {
|
Tuple(members) => {
|
||||||
|
@ -938,11 +997,11 @@ impl<'a> Compiler<'a> {
|
||||||
self.emit_op(Op::Jump);
|
self.emit_op(Op::Jump);
|
||||||
jump_idxes.push(self.len());
|
jump_idxes.push(self.len());
|
||||||
self.emit_byte(0xff);
|
self.emit_byte(0xff);
|
||||||
self.chunk.bytecode[jif_jump_idx] = self.len() as u8 - jif_jump_idx as u8 - 1;
|
self.chunk.bytecode[jif_jump_idx] = (self.len() - jif_jump_idx) as u8 - 1;
|
||||||
}
|
}
|
||||||
self.emit_op(Op::PanicNoWhen);
|
self.emit_op(Op::PanicNoWhen);
|
||||||
for idx in jump_idxes {
|
for idx in jump_idxes {
|
||||||
self.chunk.bytecode[idx] = self.len() as u8 - idx as u8 - 1;
|
self.chunk.bytecode[idx] = (self.len() - idx) as u8 - 1;
|
||||||
}
|
}
|
||||||
self.stack_depth += 1;
|
self.stack_depth += 1;
|
||||||
}
|
}
|
||||||
|
@ -986,12 +1045,12 @@ impl<'a> Compiler<'a> {
|
||||||
jump_idxes.push(self.len());
|
jump_idxes.push(self.len());
|
||||||
self.emit_byte(0xff);
|
self.emit_byte(0xff);
|
||||||
for idx in no_match_jumps {
|
for idx in no_match_jumps {
|
||||||
self.chunk.bytecode[idx] = self.len() as u8 - idx as u8 - 1;
|
self.chunk.bytecode[idx] = (self.len() - idx) as u8 - 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
self.emit_op(Op::PanicNoMatch);
|
self.emit_op(Op::PanicNoMatch);
|
||||||
for idx in jump_idxes {
|
for idx in jump_idxes {
|
||||||
self.chunk.bytecode[idx] = self.len() as u8 - idx as u8 - 1;
|
self.chunk.bytecode[idx] = (self.len() - idx) as u8 - 1;
|
||||||
}
|
}
|
||||||
while self.stack_depth > stack_depth {
|
while self.stack_depth > stack_depth {
|
||||||
self.pop();
|
self.pop();
|
||||||
|
@ -1214,12 +1273,12 @@ impl<'a> Compiler<'a> {
|
||||||
let jump_idx = self.len();
|
let jump_idx = self.len();
|
||||||
self.emit_byte(0xff);
|
self.emit_byte(0xff);
|
||||||
for idx in tup_jump_idxes {
|
for idx in tup_jump_idxes {
|
||||||
self.chunk.bytecode[idx] = self.len() as u8 - idx as u8 - 2;
|
self.chunk.bytecode[idx] = (self.len() - idx) as u8 - 2;
|
||||||
}
|
}
|
||||||
for _ in 0..arity {
|
for _ in 0..arity {
|
||||||
self.emit_op(Op::Pop);
|
self.emit_op(Op::Pop);
|
||||||
}
|
}
|
||||||
self.chunk.bytecode[jump_idx] = self.len() as u8 - jump_idx as u8 - 1;
|
self.chunk.bytecode[jump_idx] = (self.len() - jump_idx) as u8 - 1;
|
||||||
self.emit_op(Op::JumpIfNoMatch);
|
self.emit_op(Op::JumpIfNoMatch);
|
||||||
let jnm_idx = self.len();
|
let jnm_idx = self.len();
|
||||||
self.emit_byte(0xff);
|
self.emit_byte(0xff);
|
||||||
|
@ -1240,12 +1299,12 @@ impl<'a> Compiler<'a> {
|
||||||
self.emit_op(Op::Jump);
|
self.emit_op(Op::Jump);
|
||||||
jump_idxes.push(self.len());
|
jump_idxes.push(self.len());
|
||||||
self.emit_byte(0xff);
|
self.emit_byte(0xff);
|
||||||
self.chunk.bytecode[jnm_idx] = self.len() as u8 - jnm_idx as u8;
|
self.chunk.bytecode[jnm_idx] = (self.len() - jnm_idx) as u8;
|
||||||
self.scope_depth -= 1;
|
self.scope_depth -= 1;
|
||||||
}
|
}
|
||||||
self.emit_op(Op::PanicNoMatch);
|
self.emit_op(Op::PanicNoMatch);
|
||||||
for idx in jump_idxes {
|
for idx in jump_idxes {
|
||||||
self.chunk.bytecode[idx] = self.len() as u8 - idx as u8 - 1;
|
self.chunk.bytecode[idx] = (self.len() - idx) as u8 - 1;
|
||||||
}
|
}
|
||||||
self.emit_op(Op::PopN);
|
self.emit_op(Op::PopN);
|
||||||
self.emit_byte(arity);
|
self.emit_byte(arity);
|
||||||
|
@ -1305,10 +1364,7 @@ impl<'a> Compiler<'a> {
|
||||||
Placeholder => {
|
Placeholder => {
|
||||||
self.emit_op(Op::Nothing);
|
self.emit_op(Op::Nothing);
|
||||||
}
|
}
|
||||||
Arguments(..) | Placeholder | InterpolatedPattern(..) | Splattern(..) => {
|
And | Or | Arguments(..) => unreachable!(),
|
||||||
todo!()
|
|
||||||
}
|
|
||||||
And | Or => unreachable!(),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
15
src/main.rs
15
src/main.rs
|
@ -74,11 +74,18 @@ pub fn run(src: &'static str) {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn main() {
|
pub fn main() {
|
||||||
env::set_var("RUST_BACKTRACE", "1");
|
// env::set_var("RUST_BACKTRACE", "1");
|
||||||
let src = "
|
let src = "
|
||||||
fn add2 (x, y) -> add (x, y)
|
let x = {
|
||||||
|
match #{:a 1, :b 2, :c 3} with {
|
||||||
add2 (_, 2) (2)
|
#{a} -> :one
|
||||||
|
#{a, b, :c 3} -> :two
|
||||||
|
#{a, b, c} -> :three
|
||||||
|
(1, 2, 3) -> :thing
|
||||||
|
(4, 5, (6, 7, a)) -> if or (true, false, false, true) then :thing_1 else :thing_2
|
||||||
|
([:a, :b, :c, [:d, [:e, (:f, :g)]]]) -> if or (true, false, false, true) then :thing_1 else :thing_2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
";
|
";
|
||||||
run(src);
|
run(src);
|
||||||
|
|
|
@ -6,7 +6,7 @@ use imbl::{HashMap, Vector};
|
||||||
use std::cell::RefCell;
|
use std::cell::RefCell;
|
||||||
use std::rc::Rc;
|
use std::rc::Rc;
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq)]
|
#[derive(Clone, Debug)]
|
||||||
pub enum LFn {
|
pub enum LFn {
|
||||||
Declared {
|
Declared {
|
||||||
name: &'static str,
|
name: &'static str,
|
||||||
|
@ -107,7 +107,7 @@ impl PartialEq for Value {
|
||||||
(List(x), List(y)) => x == y,
|
(List(x), List(y)) => x == y,
|
||||||
(Dict(x), Dict(y)) => x == y,
|
(Dict(x), Dict(y)) => x == y,
|
||||||
(Box(x), Box(y)) => std::ptr::eq(x.as_ref().as_ptr(), y.as_ref().as_ptr()),
|
(Box(x), Box(y)) => std::ptr::eq(x.as_ref().as_ptr(), y.as_ref().as_ptr()),
|
||||||
(Fn(x), Fn(y)) => x == y,
|
(Fn(x), Fn(y)) => std::ptr::eq(x, y),
|
||||||
(BaseFn(x), BaseFn(y)) => std::ptr::eq(x, y),
|
(BaseFn(x), BaseFn(y)) => std::ptr::eq(x, y),
|
||||||
(Partial(x), Partial(y)) => x == y,
|
(Partial(x), Partial(y)) => x == y,
|
||||||
_ => false,
|
_ => false,
|
||||||
|
|
37
src/vm.rs
37
src/vm.rs
|
@ -342,6 +342,43 @@ impl Vm {
|
||||||
self.matches = self.stack[idx] == self.chunk().constants[const_idx as usize];
|
self.matches = self.stack[idx] == self.chunk().constants[const_idx as usize];
|
||||||
self.ip += 2;
|
self.ip += 2;
|
||||||
}
|
}
|
||||||
|
MatchString => {
|
||||||
|
let pattern_idx = self.chunk().bytecode[self.ip + 1];
|
||||||
|
self.ip += 2;
|
||||||
|
let scrutinee_idx = self.stack.len() - self.match_depth as usize - 1;
|
||||||
|
let scrutinee = self.stack[scrutinee_idx].clone();
|
||||||
|
self.matches = match scrutinee {
|
||||||
|
Value::String(str) => self.chunk().string_patterns[pattern_idx as usize]
|
||||||
|
.re
|
||||||
|
.is_match(str.as_str()),
|
||||||
|
Value::Interned(str) => self.chunk().string_patterns[pattern_idx as usize]
|
||||||
|
.re
|
||||||
|
.is_match(str),
|
||||||
|
_ => false,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
PushStringMatches => {
|
||||||
|
let pattern_idx = self.chunk().bytecode[self.ip + 1];
|
||||||
|
self.ip += 2;
|
||||||
|
let pattern_len = self.chunk().string_patterns[pattern_idx as usize]
|
||||||
|
.words
|
||||||
|
.len();
|
||||||
|
let scrutinee_idx = self.stack.len() - self.match_depth as usize - 1;
|
||||||
|
let scrutinee = self.stack[scrutinee_idx].clone();
|
||||||
|
let scrutinee = match scrutinee {
|
||||||
|
Value::String(str) => str.as_ref().clone(),
|
||||||
|
Value::Interned(str) => str.to_string(),
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
let captures = self.chunk().string_patterns[pattern_idx as usize]
|
||||||
|
.re
|
||||||
|
.captures(scrutinee.as_str())
|
||||||
|
.unwrap();
|
||||||
|
for cap in 0..pattern_len {
|
||||||
|
self.push(Value::String(Rc::new(captures[cap + 1].to_string())))
|
||||||
|
}
|
||||||
|
self.match_depth += pattern_len as u8;
|
||||||
|
}
|
||||||
MatchTuple => {
|
MatchTuple => {
|
||||||
let idx = self.stack.len() - self.match_depth as usize - 1;
|
let idx = self.stack.len() - self.match_depth as usize - 1;
|
||||||
let tuple_len = self.chunk().bytecode[self.ip + 1];
|
let tuple_len = self.chunk().bytecode[self.ip + 1];
|
||||||
|
|
Loading…
Reference in New Issue
Block a user