parse interpolated strings

This commit is contained in:
Scott Richmond 2024-12-04 18:30:03 -05:00
parent c5c1717e57
commit 62671094a0
2 changed files with 118 additions and 22 deletions

View File

@ -58,12 +58,7 @@ use crate::base::*;
pub fn main() { pub fn main() {
let src = " let src = "
fn foo { \"{} {foobar}\"
\"this is a docstring\"
() -> :foo
(_) -> :bar
}
doc (foo)
"; ";
let (tokens, lex_errs) = lexer().parse(src).into_output_errors(); let (tokens, lex_errs) = lexer().parse(src).into_output_errors();
if !lex_errs.is_empty() { if !lex_errs.is_empty() {
@ -78,13 +73,13 @@ doc (foo)
let (ast, _) = parser() let (ast, _) = parser()
.parse(Stream::from_iter(to_parse).map((0..src.len()).into(), |(t, s)| (t, s))) .parse(Stream::from_iter(to_parse).map((0..src.len()).into(), |(t, s)| (t, s)))
.unwrap(); .unwrap();
// println!("{}", ast); println!("{}", ast);
let mut ctx = base(); // let mut ctx = base();
let result = eval(&ast, &mut ctx).unwrap(); // let result = eval(&ast, &mut ctx).unwrap();
println!("{}", result); // println!("{}", result);
// struct_scalpel::print_dissection_info::<value::Value>() // struct_scalpel::print_dissection_info::<value::Value>()
// struct_scalpel::print_dissection_info::<parser::Ast>(); // struct_scalpel::print_dissection_info::<parser::Ast>();

View File

@ -34,16 +34,18 @@ impl<'src> fmt::Display for MatchClause<'src> {
} }
#[derive(Clone, Debug, PartialEq, Eq)] #[derive(Clone, Debug, PartialEq, Eq)]
pub enum StringPart<'src> { pub enum StringPart {
Data(&'src str), Data(String),
Word(&'src str), Word(String),
Inline(String),
} }
impl<'src> fmt::Display for StringPart<'src> { impl fmt::Display for StringPart {
fn fmt(self: &StringPart<'src>, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(self: &StringPart, f: &mut fmt::Formatter) -> fmt::Result {
let rep = match self { let rep = match self {
StringPart::Data(s) => format!("{{{s}}}"), StringPart::Word(s) => format!("{{{s}}}"),
StringPart::Word(s) => s.to_string(), StringPart::Data(s) => s.to_string(),
StringPart::Inline(s) => s.to_string(),
}; };
write!(f, "{}", rep) write!(f, "{}", rep)
} }
@ -59,7 +61,7 @@ pub enum Ast<'src> {
Keyword(&'src str), Keyword(&'src str),
Word(&'src str), Word(&'src str),
String(&'src str), String(&'src str),
Interpolated(Vec<Spanned<StringPart<'src>>>), Interpolated(Vec<Spanned<StringPart>>),
Block(Vec<Spanned<Self>>), Block(Vec<Spanned<Self>>),
If(Box<Spanned<Self>>, Box<Spanned<Self>>, Box<Spanned<Self>>), If(Box<Spanned<Self>>, Box<Spanned<Self>>, Box<Spanned<Self>>),
Tuple(Vec<Spanned<Self>>), Tuple(Vec<Spanned<Self>>),
@ -91,7 +93,7 @@ impl fmt::Display for Ast<'_> {
Ast::Interpolated(strs) => { Ast::Interpolated(strs) => {
write!( write!(
f, f,
"String: \"{}\"", "Interpolated: \"{}\"",
strs.iter() strs.iter()
.map(|(s, _)| s.to_string()) .map(|(s, _)| s.to_string())
.collect::<Vec<_>>() .collect::<Vec<_>>()
@ -251,6 +253,7 @@ pub enum Pattern<'src> {
Boolean(bool), Boolean(bool),
Number(f64), Number(f64),
String(&'src str), String(&'src str),
Interpolated(Vec<StringPart>),
Keyword(&'src str), Keyword(&'src str),
Word(&'src str), Word(&'src str),
As(&'src str, &'src str), As(&'src str, &'src str),
@ -300,10 +303,26 @@ impl fmt::Display for Pattern<'_> {
.join(", ") .join(", ")
), ),
Pattern::Pair(key, value) => write!(f, ":{} {}", key, value.0), Pattern::Pair(key, value) => write!(f, ":{} {}", key, value.0),
Pattern::Interpolated(strprts) => write!(
f,
"\"{}\"",
strprts
.iter()
.map(|part| part.to_string())
.collect::<Vec<_>>()
.join("")
),
} }
} }
} }
fn is_word_char(c: char) -> bool {
if c.is_ascii_alphanumeric() {
return true;
};
matches!(c, '_' | '/' | '?' | '!')
}
// TODO: write this // TODO: write this
// 1. we need an enum for a return type // 1. we need an enum for a return type
// either a string part or a word part // either a string part or a word part
@ -311,8 +330,81 @@ impl fmt::Display for Pattern<'_> {
// 3. this should loop through the string and allow for escaping braces // 3. this should loop through the string and allow for escaping braces
// consider using Rust-style escapes: {{}}, rather than \{\} // consider using Rust-style escapes: {{}}, rather than \{\}
// {{{foo}}} // {{{foo}}}
pub fn parse_string<'src>(s: &'src str) -> Vec<StringPart<'src>> { fn parse_string(s: &str, span: SimpleSpan) -> Result<Vec<Spanned<StringPart>>, String> {
vec![] let mut parts = vec![];
let mut current_part = String::new();
let mut start = span.start;
let mut is_word = false;
let mut chars = s.char_indices();
while let Some((i, char)) = chars.next() {
match char {
'{' => {
if is_word {
return Err("interpolations must only contain words".to_string());
};
match chars.next() {
None => return Err("unclosed brace".to_string()),
Some((_, '{')) => current_part.push('{'),
Some((i, c)) => {
if !current_part.is_empty() {
parts.push((
StringPart::Data(current_part),
SimpleSpan::new(start, start + i),
));
};
current_part = String::new();
start = i;
is_word = true;
if c.is_ascii_lowercase() {
current_part.push(c);
} else {
return Err("interpolations must only contain words".to_string());
}
}
}
}
'}' => {
if is_word {
parts.push((
StringPart::Word(current_part),
SimpleSpan::new(start, start + i),
));
current_part = String::new();
start = i;
is_word = false;
} else {
match chars.next() {
None => return Err("unclosed brace".to_string()),
Some((_, c)) => current_part.push(c),
}
}
}
_ => {
if is_word {
if is_word_char(char) {
current_part.push(char)
} else {
return Err("interpolations must only contain words".to_string());
}
} else {
current_part.push(char)
}
}
}
}
parts.push((
if is_word {
StringPart::Word(current_part)
} else if current_part == s.to_string() {
StringPart::Inline(current_part)
} else {
StringPart::Data(current_part)
},
SimpleSpan::new(start, span.end),
));
Ok(parts)
} }
pub fn parser<'src, I>( pub fn parser<'src, I>(
@ -448,7 +540,16 @@ where
} }
.map_with(|v, e| (v, e.span())); .map_with(|v, e| (v, e.span()));
let string = select! {Token::String(s) => Ast::String(s)}.map_with(|s, e| (s, e.span())); let string = select! {Token::String(s) => s}.try_map_with(|s, e| {
let parsed = parse_string(s, e.span());
match parsed {
Ok(parts) => match parts[0] {
(StringPart::Inline(_), _) => Ok((Ast::String(s), e.span())),
_ => Ok((Ast::Interpolated(parts), e.span())),
},
Err(msg) => Err(Rich::custom(e.span(), msg)),
}
});
let tuple = simple let tuple = simple
.clone() .clone()