// an implementation of Ludus // curently left undone (and not adding for a while yet): // * sets // * interpolated strings & string patterns // * pkgs, namespaces, imports, `use` forms // * with forms // * test forms // * ignored words // todo: // * [ ] rewrite fn parser to use chumsky::Recursive::declare/define // - [ ] do this to extract/simplify/DRY things like tuple patterns, fn clauses, etc. // * [x] Work around chumsky::Stream::from_iter().spanned disappearing in most recent version // * [x] investigate using labels (which is behind a compiler flag, somehow) // * [ ] wire up Ariadne parsing errors // * [ ] validation // * [ ] break this out into multiple files // * [ ] write a tree-walk VM // - [ ] learn how to deal with lifetimes // - [ ] with stack mechanics and refcounting // - [ ] with tail-call optimization // * [ ] write `base` in Rust // * [ ] turn this into a library function // * [ ] compile this into WASM // * [ ] perf testing use chumsky::{ input::{Stream, ValueInput}, prelude::*, }; use imbl::{HashMap, Vector}; use std::fmt; pub type Span = SimpleSpan; pub type Spanned = (T, Span); #[derive(Clone, Debug, PartialEq)] pub enum Token<'src> { Nil, Number(f64), Word(&'src str), Boolean(bool), Keyword(&'src str), String(&'src str), // todo: hard code these types Reserved(&'src str), Punctuation(&'src str), } impl<'src> fmt::Display for Token<'src> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Token::Number(n) => write!(f, "[Number {}]", n), Token::Word(w) => write!(f, "[Word {}]", w), Token::Boolean(b) => write!(f, "[Boolean {}]", b), Token::Keyword(k) => write!(f, "[Keyword :{}]", k), Token::String(s) => write!(f, "[String {}]", s), Token::Reserved(r) => write!(f, "[Reserved {}]", r), Token::Nil => write!(f, "[nil]"), Token::Punctuation(p) => write!(f, "[Punctuation {}]", p), } } } pub fn lexer<'src>( ) -> impl Parser<'src, &'src str, Vec<(Token<'src>, Span)>, extra::Err>> { let number = just('-') .or_not() .then(text::int(10).then(just('.').then(text::digits(10)).or_not())) .to_slice() .from_str() .unwrapped() .map(Token::Number); let word = any() .filter(char::is_ascii_lowercase) .then( any() .filter(char::is_ascii_alphanumeric) .or(one_of("*/?!_")) .repeated(), ) .to_slice(); let reserved_or_word = word.map(|word: &str| match word { "true" => Token::Boolean(true), "false" => Token::Boolean(false), "nil" => Token::Nil, // todo: hard code these as type constructors "as" | "box" | "do" | "else" | "fn" | "if" | "let" | "loop" | "match" | "panic!" | "recur" | "repeat" | "then" | "when" | "with" => Token::Reserved(word), _ => Token::Word(word), }); let keyword = just(':').ignore_then(word.clone()).map(Token::Keyword); let string = just('"') .ignore_then(none_of("\"").repeated().to_slice()) .then_ignore(just('"')) .map(Token::String); // todo: hard code these as type constructors let punctuation = one_of(",=[]{}()>;\n_") .to_slice() .or(just("->")) .or(just("...")) .or(just("#{")) .or(just("${")) .map(Token::Punctuation); let token = number .or(reserved_or_word) .or(keyword) .or(string) .or(punctuation); let comment = just('&') .ignore_then(any().and_is(just('\n').not()).repeated()) .repeated(); let ludus_ws = just(' ').or(just('\t')).repeated(); token .map_with(|tok, e| (tok, e.span())) .padded_by(ludus_ws) .padded_by(comment) .recover_with(skip_then_retry_until(any().ignored(), end())) .repeated() .collect() } #[derive(Clone, Debug, PartialEq)] struct Clause<'src> { patt: Pattern<'src>, span: Span, body: Ast<'src>, } #[derive(Clone, Debug, PartialEq)] struct Fn<'src> { name: &'src str, span: Span, body: Vec>, } #[derive(Clone, Debug, PartialEq)] enum Value<'src> { Nil, Boolean(bool), Number(f64), Keyword(&'src str), String(&'src str), Tuple(Vec), // on the heap for now List(Vector), // ref-counted, immutable, persistent Dict(HashMap<&'src str, Self>), // ref-counted, immutable, persistent Fn(&'src Fn<'src>), // Set(HashSet), // Sets are hard // Sets require Eq // Eq is not implemented on f64, because NaNs // We could use ordered_float::NotNan // Let's defer that // We're not really using sets in Ludus // Other things we're not implementing yet: // pkgs, nses, tests } impl<'src> fmt::Display for Value<'src> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Value::Nil => write!(f, "nil"), Value::Boolean(b) => write!(f, "{}", b), Value::Number(n) => write!(f, "{}", n), Value::Keyword(k) => write!(f, ":{}", k), Value::String(s) => write!(f, "\"{}\"", s), Value::Fn(fun) => write!(f, "fn {}", fun.name), Value::Tuple(t) => write!( f, "({})", t.iter() .map(|x| x.to_string()) .collect::>() .join(", ") ), Value::List(l) => write!( f, "[{}]", l.iter() .map(|x| x.to_string()) .collect::>() .join(", ") ), Value::Dict(d) => write!(f, "#{{{:?}}}", d), } } } #[derive(Clone, Debug, PartialEq)] enum Ast<'src> { Error, Placeholder, Value(Value<'src>), Word(&'src str), Block(Vec>), If(Box>, Box>, Box>), Tuple(Vec>), Arguments(Vec>), List(Vec>), Pair(Value<'src>, Box>), Dict(Vec>), Let(Box>>, Box>), Box(&'src str, Box>), Synthetic(Box>, Box>, Vec>), WhenClause(Box>, Box>), When(Vec>), MatchClause(Box>>, Box>), Match(Box>, Vec>), FnClause(Box>>, Box>), Fn(&'src str, Vec>), FnDeclaration(&'src str), Panic(Box>), Do(Vec>), Repeat(Box>, Box>), Loop(Box>, Vec>), Recur(Vec>), } impl<'src> fmt::Display for Ast<'src> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Ast::Error => write!(f, "Error"), Ast::Value(v) => write!(f, "Value: {}", v), Ast::Word(w) => write!(f, "Word: {}", w), Ast::Block(b) => write!( f, "Block: <{}>", b.iter() .map(|(line, _)| line.to_string()) .collect::>() .join("\n") ), Ast::If(cond, then_branch, else_branch) => write!( f, "If: {} Then: {} Else: {}", cond.0, then_branch.0, else_branch.0 ), Ast::Let(pattern, expression) => { write!(f, "Let: {} = {}", pattern.0, expression.0) } Ast::Pair(kw, expr) => write!(f, "{} {}", kw, expr.0), Ast::Dict(entries) => write!( f, "#{{{}}}", entries .iter() .map(|(pair, _)| pair.to_string()) .collect::>() .join(", ") ), Ast::List(l) => write!( f, "List: [{}]", l.iter() .map(|(line, _)| line.to_string()) .collect::>() .join("\n") ), Ast::Tuple(t) | Ast::Arguments(t) => write!( f, "Tuple: ({})", t.iter() .map(|(line, _)| line.to_string()) .collect::>() .join("\n") ), Ast::Synthetic(root, first, rest) => write!( f, "Synth: [{}, {}, {}]", root.0, first.0, rest.iter() .map(|(term, _)| term.to_string()) .collect::>() .join("\n") ), _ => unimplemented!(), } } } #[derive(Clone, Debug, PartialEq)] enum Pattern<'src> { Atom(Value<'src>), Word(&'src str), Placeholder, Tuple(Vec>), List(Vec>), // is this the right representation for Dicts? // Could/should this also be a Vec? Pair(Value<'src>, Box>), Dict(Vec>), } impl<'src> fmt::Display for Pattern<'src> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Pattern::Atom(a) => write!(f, "{}", a), Pattern::Word(w) => write!(f, "{}", w), Pattern::Placeholder => write!(f, "_"), Pattern::Tuple(t) => write!( f, "({})", t.iter() .map(|x| x.0.to_string()) .collect::>() .join(", ") ), Pattern::List(l) => write!( f, "({})", l.iter() .map(|x| x.0.to_string()) .collect::>() .join(", ") ), Pattern::Pair(kw, expr) => write!(f, "{} {}", kw, expr.0), Pattern::Dict(entries) => write!( f, "#{{{}}}", entries .iter() .map(|(pair, _)| pair.to_string()) .collect::>() .join(", ") ), } } } fn parser<'src, I>( ) -> impl Parser<'src, I, Spanned>, extra::Err, Span>>> + Clone where I: ValueInput<'src, Token = Token<'src>, Span = Span>, { recursive(|expr| { let separators = recursive(|separators| { just(Token::Punctuation(",")) .or(just(Token::Punctuation("\n"))) .then(separators.clone().repeated()) }); let pattern = recursive(|pattern| { let placeholder = select! {Token::Punctuation("_") => Pattern::Placeholder} .map_with(|p, e| (p, e.span())); let word = select! { Token::Word(w) => Pattern::Word(w) }.map_with(|w, e| (w, e.span())); let atom = select! { Token::Nil => Pattern::Atom(Value::Nil), Token::Boolean(b) => Pattern::Atom(Value::Boolean(b)), Token::Number(n) => Pattern::Atom(Value::Number(n)), Token::Keyword(k) => Pattern::Atom(Value::Keyword(k)), } .map_with(|a, e| (a, e.span())); let tuple = pattern .clone() .separated_by(separators.clone()) .allow_leading() .allow_trailing() .collect() .delimited_by(just(Token::Punctuation("(")), just(Token::Punctuation(")"))) .map_with(|tuple, e| (Pattern::Tuple(tuple), e.span())); let list = pattern .clone() .separated_by(separators.clone()) .allow_leading() .allow_trailing() .collect() .delimited_by(just(Token::Punctuation("[")), just(Token::Punctuation("]"))) .map_with(|list, e| (Pattern::List(list), e.span())); let pair = select! {Token::Keyword(k) => Value::Keyword(k)} .then(pattern.clone()) .map_with(|(kw, patt), e| (Pattern::Pair(kw, Box::new(patt)), e.span())); let shorthand = select! {Token::Word(w) => w}.map_with(|w, e| { ( Pattern::Pair(Value::Keyword(w), Box::new((Pattern::Word(w), e.span()))), e.span(), ) }); let dict = pair .or(shorthand) .separated_by(separators.clone()) .allow_leading() .allow_trailing() .collect() .delimited_by( just(Token::Punctuation("#{")), just(Token::Punctuation("}")), ) .map_with(|dict, e| (Pattern::Dict(dict), e.span())); atom.or(word) .or(placeholder) .or(tuple) .or(list) .or(dict) .labelled("pattern") }); let placeholder = select! {Token::Punctuation("_") => Ast::Placeholder}.map_with(|p, e| (p, e.span())); let word = select! { Token::Word(w) => Ast::Word(w) } .map_with(|w, e| (w, e.span())) .labelled("word"); let simple = recursive(|simple| { let value = select! { Token::Nil => Ast::Value(Value::Nil), Token::Boolean(b) => Ast::Value(Value::Boolean(b)), Token::Number(n) => Ast::Value(Value::Number(n)), Token::String(s) => Ast::Value(Value::String(s)), } .map_with(|v, e| (v, e.span())); let keyword = select! {Token::Keyword(k) => Ast::Value(Value::Keyword(k)),} .map_with(|k, e| (k, e.span())); // let word = select! {Token::Word(w) => Ast::Word(w)}.map_with(|w, e| (w, e.span())); let tuple = simple .clone() .separated_by(separators.clone()) .allow_leading() .allow_trailing() .collect() .delimited_by(just(Token::Punctuation("(")), just(Token::Punctuation(")"))) .map_with(|tuple, e| (Ast::Tuple(tuple), e.span())); // let placeholder = select! {Token::Punctuation("_") => Ast::Placeholder} // .map_with(|p, e| (p, e.span())); let args = simple .clone() .or(placeholder) .separated_by(separators.clone()) .allow_leading() .allow_trailing() .collect() .delimited_by(just(Token::Punctuation("(")), just(Token::Punctuation(")"))) .map_with(|args, e| (Ast::Arguments(args), e.span())); let synth_root = word.clone().or(keyword.clone()); let synth_term = keyword.clone().or(args); let synthetic = synth_root .then(synth_term.clone()) .then(synth_term.clone().repeated().collect()) .map_with(|((root, first), rest), e| { ( Ast::Synthetic(Box::new(root), Box::new(first), rest), e.span(), ) }); let list = simple .clone() .separated_by(separators.clone()) .allow_leading() .allow_trailing() .collect() .delimited_by(just(Token::Punctuation("[")), just(Token::Punctuation("]"))) .map_with(|list, e| (Ast::List(list), e.span())); let pair = select! {Token::Keyword(k) => Value::Keyword(k)} .then(simple.clone()) .map_with(|(kw, expr), e| (Ast::Pair(kw, Box::new(expr)), e.span())); let shorthand = select! {Token::Word(w) => w}.map_with(|w, e| { ( Ast::Pair(Value::Keyword(w), Box::new((Ast::Word(w), e.span()))), e.span(), ) }); let dict = pair .or(shorthand) .separated_by(separators.clone()) .allow_leading() .allow_trailing() .collect() .delimited_by( just(Token::Punctuation("#{")), just(Token::Punctuation("}")), ) .map_with(|dict, e| (Ast::Dict(dict), e.span())); synthetic .or(word) .or(keyword) .or(value) .or(tuple) .or(list) .or(dict) .labelled("simple expression") }); let terminators = recursive(|terminators| { just(Token::Punctuation("\n")) .or(just(Token::Punctuation(";"))) .then(terminators.clone().repeated()) .labelled("terminator") }); let nonbinding = recursive(|nonbinding| { let block = expr .clone() .separated_by(terminators.clone()) .allow_leading() .allow_trailing() .collect() .delimited_by(just(Token::Punctuation("{")), just(Token::Punctuation("}"))) .map_with(|block, e| (Ast::Block(block), e.span())) .recover_with(via_parser(nested_delimiters( Token::Punctuation("{"), Token::Punctuation("}"), [ (Token::Punctuation("("), Token::Punctuation(")")), (Token::Punctuation("["), Token::Punctuation("]")), ], |span| (Ast::Error, span), ))); let if_ = just(Token::Reserved("if")) .ignore_then(simple.clone()) .then_ignore(just(Token::Reserved("then"))) .then(expr.clone()) .then_ignore(just(Token::Reserved("else"))) .then(expr.clone()) .map_with(|((condition, then_branch), else_branch), e| { ( Ast::If( Box::new(condition), Box::new(then_branch), Box::new(else_branch), ), e.span(), ) }); let when_clause = simple .clone() .then_ignore(just(Token::Punctuation("->"))) .then(expr.clone()) .map_with(|(cond, body), e| { (Ast::WhenClause(Box::new(cond), Box::new(body)), e.span()) }); let when = just(Token::Reserved("when")) .ignore_then( when_clause .separated_by(terminators.clone()) .allow_trailing() .allow_leading() .collect() .delimited_by(just(Token::Punctuation("{")), just(Token::Punctuation("}"))), ) .map_with(|clauses, e| (Ast::When(clauses), e.span())); let match_clause = pattern .clone() .then_ignore(just(Token::Punctuation("->"))) .then(expr.clone()) .map_with(|(patt, body), e| { (Ast::MatchClause(Box::new(patt), Box::new(body)), e.span()) }); let match_ = just(Token::Reserved("match")) .ignore_then(simple.clone()) .then_ignore(just(Token::Reserved("with"))) .then( match_clause .clone() .separated_by(terminators.clone()) .allow_leading() .allow_trailing() .collect() .delimited_by(just(Token::Punctuation("{")), just(Token::Punctuation("}"))), ) .map_with(|(expr, clauses), e| (Ast::Match(Box::new(expr), clauses), e.span())); let conditional = when.or(if_).or(match_); //todo: // * [x] do // * [ ] loop // * [ ] repeat // * [x] panic! let panic = just(Token::Reserved("panic!")) .ignore_then(nonbinding.clone()) .map_with(|expr, e| (Ast::Panic(Box::new(expr)), e.span())); let do_ = just(Token::Reserved("do")) .ignore_then( nonbinding .clone() .separated_by( just(Token::Punctuation(">")) .then(just(Token::Punctuation("\n")).repeated()), ) .collect(), ) .map_with(|exprs, e| (Ast::Do(exprs), e.span())); let repeat = just(Token::Reserved("repeat")) .ignore_then(simple.clone()) .then(block.clone()) .map_with(|(count, body), e| { (Ast::Repeat(Box::new(count), Box::new(body)), e.span()) }); let tuple_pattern = pattern .clone() .separated_by(separators.clone()) .allow_leading() .allow_trailing() .collect() .delimited_by(just(Token::Punctuation("(")), just(Token::Punctuation(")"))) .map_with(|tuple, e| (Pattern::Tuple(tuple), e.span())) .labelled("tuple pattern"); let fn_clause = tuple_pattern .then_ignore(just(Token::Punctuation("->"))) .then(nonbinding.clone()) .map_with(|(pattern, body), e| { (Ast::FnClause(Box::new(pattern), Box::new(body)), e.span()) }) .labelled("function clause"); let lambda = just(Token::Reserved("fn")) .ignore_then(fn_clause) .map_with(|clause, e| (Ast::Fn("anonymous", vec![clause]), e.span())); let loop_ = just(Token::Reserved("loop")); simple .clone() .or(conditional) .or(block) .or(lambda) .or(panic) .or(do_) .labelled("nonbinding expression") }); let let_ = just(Token::Reserved("let")) .ignore_then(pattern.clone()) .then_ignore(just(Token::Punctuation("="))) .then(nonbinding.clone()) .map_with(|(pattern, expression), e| { (Ast::Let(Box::new(pattern), Box::new(expression)), e.span()) }); let box_ = just(Token::Reserved("box")) .ignore_then(word.clone()) .then_ignore(just(Token::Punctuation("="))) .then(nonbinding.clone()) .map_with(|(word, expr), e| { let name = if let Ast::Word(w) = word.0 { w } else { unreachable!() }; (Ast::Box(name, Box::new(expr)), e.span()) }); let fn_decl = just(Token::Reserved("fn")) .ignore_then(word.clone()) .map_with(|(word, _), e| { let name = if let Ast::Word(w) = word { w } else { unreachable!() }; (Ast::FnDeclaration(name), e.span()) }); let tuple_pattern = pattern .clone() .separated_by(separators.clone()) .allow_leading() .allow_trailing() .collect() .delimited_by(just(Token::Punctuation("(")), just(Token::Punctuation(")"))) .map_with(|tuple, e| (Pattern::Tuple(tuple), e.span())); let fn_clause = tuple_pattern .then_ignore(just(Token::Punctuation("->"))) .then(nonbinding.clone()) .map_with(|(pattern, body), e| { (Ast::FnClause(Box::new(pattern), Box::new(body)), e.span()) }); let fn_named = just(Token::Reserved("fn")) .ignore_then(word.clone()) .then(fn_clause.clone()) .map_with(|(word, clause), e| { let name = if let Ast::Word(word) = word.0 { word } else { unreachable!() }; (Ast::Fn(name, vec![clause]), e.span()) }); let fn_compound = just(Token::Reserved("fn")) .ignore_then(word.clone()) .then( fn_clause .clone() .separated_by(terminators) .allow_leading() .allow_trailing() .collect() .delimited_by(just(Token::Punctuation("{")), just(Token::Punctuation("}"))), ) .map_with(|(word, clauses), e| { let name = if let Ast::Word(word) = word.0 { word } else { unreachable!() }; (Ast::Fn(name, clauses), e.span()) }); let fn_ = fn_named.or(fn_compound).or(fn_decl); let binding = let_.or(box_).or(fn_); nonbinding.or(binding) }) } pub fn main() { let src = "let #{a, :b b} = foo"; let (tokens, lex_errs) = lexer().parse(src).into_output_errors(); if lex_errs.len() > 0 { println!("{:?}", lex_errs); return (); } let tokens = tokens.unwrap(); let to_parse = tokens.clone(); for (token, _) in tokens { println!("{}", token) } let (ast, _) = parser() .parse(Stream::from_iter(to_parse).map((0..src.len()).into(), |(t, s)| (t, s))) .unwrap(); println!("{}", ast); } // #[cfg(test)] // mod tests { // use crate::lexer; // use crate::Token; // use chumsky::prelude::*; // #[test] // fn it_lexes_positive_ints() { // let (mytoken, _) = lexer().parse("42").unwrap()[0].clone(); // assert_eq!(mytoken, Token::Number(42.0)) // } // #[test] // fn it_lexes_negative_ints() { // let (mytoken, _) = lexer().parse("-42").unwrap()[0].clone(); // assert_eq!(mytoken, Token::Number(-42.0)) // } // #[test] // fn it_lexes_positive_floats() { // let (mytoken, _) = lexer().parse("42.032").unwrap()[0].clone(); // assert_eq!(mytoken, Token::Number(42.032)) // } // #[test] // fn it_lexes_positive_decimals() { // let (mytoken, _) = lexer().parse("0.123").unwrap()[0].clone(); // assert_eq!(mytoken, Token::Number(0.123)) // } // #[test] // fn it_lexes_negative_floats() { // let mytoken = lexer().parse("-42.123").unwrap()[0].clone().0; // assert_eq!(mytoken, Token::Number(-42.123)) // } // #[test] // fn it_lexes_negative_decimals() { // let mytoken = lexer().parse("-0.123").unwrap()[0].clone().0; // assert_eq!(mytoken, Token::Number(-0.123)) // } // #[test] // fn it_lexes_bools() { // let tt = lexer().parse("true").unwrap()[0].clone().0; // assert_eq!(tt, Token::Boolean(true)); // let ff = lexer().parse("false").unwrap()[0].clone().0; // assert_eq!(ff, Token::Boolean(false)) // } // #[test] // fn it_lexes_words() { // let mytoken = lexer().parse("foo").unwrap()[0].clone().0; // assert_eq!(mytoken, Token::Word("foo")) // } // #[test] // fn it_lexes_keywords() { // let kw = lexer().parse(":foo").unwrap()[0].clone().0; // assert_eq!(kw, Token::Keyword("foo")) // } // #[test] // fn it_lexes_strings() { // let s = lexer().parse("\"foo bar baz\"").unwrap()[0].clone().0; // assert_eq!(s, Token::String("foo bar baz")) // } // #[test] // fn it_ignores_comments() { // let e = lexer().parse("foo &bar\nbaz").unwrap(); // assert_eq!(e[0].0, Token::Word("foo")); // assert_eq!(e[1].0, Token::Punctuation("\n")); // assert_eq!(e[2].0, Token::Word("baz")) // } // #[test] // fn it_lexes_multiple_tokens() { // let toks = lexer().parse("foo;bar\nbaz").unwrap(); // assert_eq!(toks[0].0, Token::Word("foo")); // assert_eq!(toks[2].0, Token::Word("bar")); // assert_eq!(toks[4].0, Token::Word("baz")) // } // #[test] // fn it_lexes_collections() { // let toks = lexer().parse("(1, 2)").unwrap(); // assert_eq!(toks[0].0, Token::Punctuation("(")); // assert_eq!(toks[1].0, Token::Number(1.0)); // assert_eq!(toks[2].0, Token::Punctuation(",")); // assert_eq!(toks[3].0, Token::Number(2.0)); // assert_eq!(toks[4].0, Token::Punctuation(")")) // } // }