parse string patterns, perhaps correctly, perhaps not

This commit is contained in:
Scott Richmond 2024-12-07 18:26:25 -05:00
parent 13c14fd38f
commit 2a26316b50
2 changed files with 96 additions and 20 deletions

View File

@ -58,8 +58,7 @@ use crate::base::*;
pub fn main() {
let src = "
let foo = \" fOobArbaz \"
trimr (foo)
let \"{foo}\" = bar
";
let (tokens, lex_errs) = lexer().parse(src).into_output_errors();
if !lex_errs.is_empty() {
@ -74,13 +73,13 @@ trimr (foo)
let (ast, _) = parser()
.parse(Stream::from_iter(to_parse).map((0..src.len()).into(), |(t, s)| (t, s)))
.unwrap();
// println!("{}", ast);
println!("{}", ast);
let mut ctx = base();
// let mut ctx = base();
let result = eval(&ast, &mut ctx).unwrap();
// let result = eval(&ast, &mut ctx).unwrap();
println!("{}", result);
// println!("{}", result);
// struct_scalpel::print_dissection_info::<value::Value>()
// struct_scalpel::print_dissection_info::<parser::Ast>();

View File

@ -247,13 +247,39 @@ impl<'src> fmt::Display for PairPattern<'src> {
}
}
pub struct StringMatcher<'src>(Box<dyn Fn(String) -> Option<Vec<(String, String)>> + 'src>);
impl PartialEq for StringMatcher<'_> {
fn eq(&self, _other: &StringMatcher) -> bool {
true
}
}
impl<'src> Clone for StringMatcher<'src> {
fn clone(&self) -> StringMatcher<'src> {
unreachable!()
}
}
impl fmt::Display for StringMatcher<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "string matcher")
}
}
impl fmt::Debug for StringMatcher<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "string matcher")
}
}
#[derive(Clone, Debug, PartialEq)]
pub enum Pattern<'src> {
Nil,
Boolean(bool),
Number(f64),
String(&'src str),
Interpolated(Vec<StringPart>),
Interpolated(Vec<Spanned<StringPart>>, StringMatcher<'src>),
Keyword(&'src str),
Word(&'src str),
As(&'src str, &'src str),
@ -303,12 +329,12 @@ impl fmt::Display for Pattern<'_> {
.join(", ")
),
Pattern::Pair(key, value) => write!(f, ":{} {}", key, value.0),
Pattern::Interpolated(strprts) => write!(
Pattern::Interpolated(strprts, _) => write!(
f,
"\"{}\"",
"interpolated: \"{}\"",
strprts
.iter()
.map(|part| part.to_string())
.map(|part| part.0.to_string())
.collect::<Vec<_>>()
.join("")
),
@ -323,13 +349,6 @@ fn is_word_char(c: char) -> bool {
matches!(c, '_' | '/' | '?' | '!')
}
// TODO: write this
// 1. we need an enum for a return type
// either a string part or a word part
// 2. our string types, both patterns and values, now contains a vec of nodes
// 3. this should loop through the string and allow for escaping braces
// consider using Rust-style escapes: {{}}, rather than \{\}
// {{{foo}}}
fn parse_string(s: &str, span: SimpleSpan) -> Result<Vec<Spanned<StringPart>>, String> {
let mut parts = vec![];
let mut current_part = String::new();
@ -396,7 +415,8 @@ fn parse_string(s: &str, span: SimpleSpan) -> Result<Vec<Spanned<StringPart>>, S
parts.push((
if is_word {
StringPart::Word(current_part)
let part = current_part.clone();
StringPart::Word(part)
} else if current_part == s {
StringPart::Inline(current_part)
} else {
@ -407,6 +427,50 @@ fn parse_string(s: &str, span: SimpleSpan) -> Result<Vec<Spanned<StringPart>>, S
Ok(parts)
}
pub fn compile_string_pattern<'src>(parts: Vec<Spanned<StringPart>>) -> StringMatcher<'src> {
StringMatcher(Box::new(move |scrutinee| {
let mut last_match = 0;
let mut parts_iter = parts.iter();
let mut matches = vec![];
while let Some((part, _)) = parts_iter.next() {
match part {
StringPart::Data(string) => match scrutinee.find(string.as_str()) {
Some(i) => {
// if i = 0, we're at the beginning
if i == 0 && last_match == 0 {
last_match = i + string.len();
continue;
}
// in theory, we only hit this branch if the first part is Data
unreachable!("internal Ludus error: bad string pattern")
}
None => return None,
},
StringPart::Word(word) => {
let to_test = scrutinee.get(last_match..scrutinee.len()).unwrap();
match parts_iter.next() {
None => matches.push((word.clone(), to_test.to_string())),
Some(part) => {
let (StringPart::Data(part), _) = part else {
unreachable!("internal Ludus error: bad string pattern")
};
match to_test.find(part) {
None => return None,
Some(i) => {
last_match = i + part.len();
continue;
}
}
}
}
}
_ => unreachable!("internal Ludus error"),
}
}
Some(matches)
}))
}
pub fn parser<'src, I>(
) -> impl Parser<'src, I, Spanned<Ast<'src>>, extra::Err<Rich<'src, Token<'src>, Span>>> + Clone
where
@ -443,11 +507,23 @@ where
Token::Boolean(b) => Pattern::Boolean(b),
Token::Number(n) => Pattern::Number(n),
Token::Keyword(k) => Pattern::Keyword(k),
// todo: actual string patterns
Token::String(s) => Pattern::String(s)
}
.map_with(|a, e| (a, e.span()));
let string_pattern = select! {Token::String(s) => s}.try_map_with(|s, e| {
let parsed = parse_string(s, e.span());
match parsed {
Ok(parts) => match parts[0] {
(StringPart::Inline(_), _) => Ok((Pattern::String(s), e.span())),
_ => Ok((
Pattern::Interpolated(parts.clone(), compile_string_pattern(parts)),
e.span(),
)),
},
Err(msg) => Err(Rich::custom(e.span(), msg)),
}
});
let bare_splat = just(Token::Punctuation("...")).map_with(|_, e| {
(
Pattern::Splattern(Box::new((Pattern::Placeholder, e.span()))),
@ -517,6 +593,7 @@ where
pattern.define(
atom_pattern
.or(string_pattern)
.or(as_pattern)
.or(word_pattern)
.or(placeholder_pattern)