From 60f0a1081dd3fa3bb2cfde315e0bc0ca528d1e6c Mon Sep 17 00:00:00 2001 From: Scott Richmond Date: Thu, 31 Oct 2024 16:59:26 -0400 Subject: [PATCH] break out into separate modules --- src/lexer.rs | 98 +++++++ src/main.rs | 716 +------------------------------------------------- src/parser.rs | 544 ++++++++++++++++++++++++++++++++++++++ src/spans.rs | 4 + src/value.rs | 71 +++++ src/vm.rs | 0 6 files changed, 725 insertions(+), 708 deletions(-) create mode 100644 src/lexer.rs create mode 100644 src/parser.rs create mode 100644 src/spans.rs create mode 100644 src/value.rs create mode 100644 src/vm.rs diff --git a/src/lexer.rs b/src/lexer.rs new file mode 100644 index 0000000..e850398 --- /dev/null +++ b/src/lexer.rs @@ -0,0 +1,98 @@ +use crate::spans::*; +use chumsky::prelude::*; +use std::fmt; + +#[derive(Clone, Debug, PartialEq)] +pub enum Token<'src> { + Nil, + Number(f64), + Word(&'src str), + Boolean(bool), + Keyword(&'src str), + String(&'src str), + // todo: hard code these types + Reserved(&'src str), + Punctuation(&'src str), +} + +impl<'src> fmt::Display for Token<'src> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Token::Number(n) => write!(f, "[Number {}]", n), + Token::Word(w) => write!(f, "[Word {}]", w), + Token::Boolean(b) => write!(f, "[Boolean {}]", b), + Token::Keyword(k) => write!(f, "[Keyword :{}]", k), + Token::String(s) => write!(f, "[String {}]", s), + Token::Reserved(r) => write!(f, "[Reserved {}]", r), + Token::Nil => write!(f, "[nil]"), + Token::Punctuation(p) => write!(f, "[Punctuation {}]", p), + } + } +} + +pub fn lexer<'src>( +) -> impl Parser<'src, &'src str, Vec<(Token<'src>, Span)>, extra::Err>> { + let number = just('-') + .or_not() + .then(text::int(10).then(just('.').then(text::digits(10)).or_not())) + .to_slice() + .from_str() + .unwrapped() + .map(Token::Number); + + let word = any() + .filter(char::is_ascii_lowercase) + .then( + any() + .filter(char::is_ascii_alphanumeric) + .or(one_of("*/?!_")) + .repeated(), + ) + .to_slice(); + + let reserved_or_word = word.map(|word: &str| match word { + "true" => Token::Boolean(true), + "false" => Token::Boolean(false), + "nil" => Token::Nil, + // todo: hard code these as type constructors + "as" | "box" | "do" | "else" | "fn" | "if" | "let" | "loop" | "match" | "panic!" + | "recur" | "repeat" | "then" | "when" | "with" => Token::Reserved(word), + _ => Token::Word(word), + }); + + let keyword = just(':').ignore_then(word.clone()).map(Token::Keyword); + + let string = just('"') + .ignore_then(none_of("\"").repeated().to_slice()) + .then_ignore(just('"')) + .map(Token::String); + + // todo: hard code these as type constructors + let punctuation = one_of(",=[]{}()>;\n_") + .to_slice() + .or(just("->")) + .or(just("...")) + .or(just("#{")) + .or(just("${")) + .map(Token::Punctuation); + + let token = number + .or(reserved_or_word) + .or(keyword) + .or(string) + .or(punctuation); + + let comment = just('&') + .ignore_then(any().and_is(just('\n').not()).repeated()) + .repeated(); + + let ludus_ws = just(' ').or(just('\t')).repeated(); + + token + .map_with(|tok, e| (tok, e.span())) + .padded_by(ludus_ws) + .padded_by(comment) + .recover_with(skip_then_retry_until(any().ignored(), end())) + .repeated() + .collect() +} diff --git a/src/main.rs b/src/main.rs index 69dffcc..af2f32a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -15,7 +15,7 @@ // * [x] investigate using labels (which is behind a compiler flag, somehow) // * [ ] wire up Ariadne parsing errors // * [ ] validation -// * [ ] break this out into multiple files +// * [x] break this out into multiple files // * [ ] write a tree-walk VM // - [ ] learn how to deal with lifetimes // - [ ] with stack mechanics and refcounting @@ -25,717 +25,17 @@ // * [ ] compile this into WASM // * [ ] perf testing -use chumsky::{ - input::{Stream, ValueInput}, - prelude::*, - recursive::Recursive, -}; -use imbl::{HashMap, Vector}; -use std::fmt; +use chumsky::{input::Stream, prelude::*}; -pub type Span = SimpleSpan; -pub type Spanned = (T, Span); +mod spans; -#[derive(Clone, Debug, PartialEq)] -pub enum Token<'src> { - Nil, - Number(f64), - Word(&'src str), - Boolean(bool), - Keyword(&'src str), - String(&'src str), - // todo: hard code these types - Reserved(&'src str), - Punctuation(&'src str), -} +mod lexer; +use crate::lexer::*; -impl<'src> fmt::Display for Token<'src> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Token::Number(n) => write!(f, "[Number {}]", n), - Token::Word(w) => write!(f, "[Word {}]", w), - Token::Boolean(b) => write!(f, "[Boolean {}]", b), - Token::Keyword(k) => write!(f, "[Keyword :{}]", k), - Token::String(s) => write!(f, "[String {}]", s), - Token::Reserved(r) => write!(f, "[Reserved {}]", r), - Token::Nil => write!(f, "[nil]"), - Token::Punctuation(p) => write!(f, "[Punctuation {}]", p), - } - } -} +mod value; -pub fn lexer<'src>( -) -> impl Parser<'src, &'src str, Vec<(Token<'src>, Span)>, extra::Err>> { - let number = just('-') - .or_not() - .then(text::int(10).then(just('.').then(text::digits(10)).or_not())) - .to_slice() - .from_str() - .unwrapped() - .map(Token::Number); - - let word = any() - .filter(char::is_ascii_lowercase) - .then( - any() - .filter(char::is_ascii_alphanumeric) - .or(one_of("*/?!_")) - .repeated(), - ) - .to_slice(); - - let reserved_or_word = word.map(|word: &str| match word { - "true" => Token::Boolean(true), - "false" => Token::Boolean(false), - "nil" => Token::Nil, - // todo: hard code these as type constructors - "as" | "box" | "do" | "else" | "fn" | "if" | "let" | "loop" | "match" | "panic!" - | "recur" | "repeat" | "then" | "when" | "with" => Token::Reserved(word), - _ => Token::Word(word), - }); - - let keyword = just(':').ignore_then(word.clone()).map(Token::Keyword); - - let string = just('"') - .ignore_then(none_of("\"").repeated().to_slice()) - .then_ignore(just('"')) - .map(Token::String); - - // todo: hard code these as type constructors - let punctuation = one_of(",=[]{}()>;\n_") - .to_slice() - .or(just("->")) - .or(just("...")) - .or(just("#{")) - .or(just("${")) - .map(Token::Punctuation); - - let token = number - .or(reserved_or_word) - .or(keyword) - .or(string) - .or(punctuation); - - let comment = just('&') - .ignore_then(any().and_is(just('\n').not()).repeated()) - .repeated(); - - let ludus_ws = just(' ').or(just('\t')).repeated(); - - token - .map_with(|tok, e| (tok, e.span())) - .padded_by(ludus_ws) - .padded_by(comment) - .recover_with(skip_then_retry_until(any().ignored(), end())) - .repeated() - .collect() -} - -#[derive(Clone, Debug, PartialEq)] -struct Clause<'src> { - patt: Pattern<'src>, - span: Span, - body: Ast<'src>, -} - -#[derive(Clone, Debug, PartialEq)] -struct Fn<'src> { - name: &'src str, - span: Span, - body: Vec>, -} - -#[derive(Clone, Debug, PartialEq)] -enum Value<'src> { - Nil, - Boolean(bool), - Number(f64), - Keyword(&'src str), - String(&'src str), - Tuple(Vec), // on the heap for now - List(Vector), // ref-counted, immutable, persistent - Dict(HashMap<&'src str, Self>), // ref-counted, immutable, persistent - Fn(&'src Fn<'src>), - // Set(HashSet), - // Sets are hard - // Sets require Eq - // Eq is not implemented on f64, because NaNs - // We could use ordered_float::NotNan - // Let's defer that - // We're not really using sets in Ludus - - // Other things we're not implementing yet: - // pkgs, nses, tests -} - -impl<'src> fmt::Display for Value<'src> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Value::Nil => write!(f, "nil"), - Value::Boolean(b) => write!(f, "{}", b), - Value::Number(n) => write!(f, "{}", n), - Value::Keyword(k) => write!(f, ":{}", k), - Value::String(s) => write!(f, "\"{}\"", s), - Value::Fn(fun) => write!(f, "fn {}", fun.name), - Value::Tuple(t) => write!( - f, - "({})", - t.iter() - .map(|x| x.to_string()) - .collect::>() - .join(", ") - ), - Value::List(l) => write!( - f, - "[{}]", - l.iter() - .map(|x| x.to_string()) - .collect::>() - .join(", ") - ), - Value::Dict(d) => write!(f, "#{{{:?}}}", d), - } - } -} - -#[derive(Clone, Debug, PartialEq)] -enum Ast<'src> { - Error, - Placeholder, - Value(Value<'src>), - Word(&'src str), - Block(Vec>), - If(Box>, Box>, Box>), - Tuple(Vec>), - Arguments(Vec>), - List(Vec>), - Pair(Value<'src>, Box>), - Dict(Vec>), - Let(Box>>, Box>), - Box(&'src str, Box>), - Synthetic(Box>, Box>, Vec>), - WhenClause(Box>, Box>), - When(Vec>), - MatchClause(Box>>, Box>), - Match(Box>, Vec>), - FnClause(Box>>, Box>), - Fn(&'src str, Vec>), - FnDeclaration(&'src str), - Panic(Box>), - Do(Vec>), - Repeat(Box>, Box>), - Loop(Box>, Vec>), - Recur(Vec>), -} - -impl<'src> fmt::Display for Ast<'src> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Ast::Error => write!(f, "Error"), - Ast::Value(v) => write!(f, "Value: {}", v), - Ast::Word(w) => write!(f, "Word: {}", w), - Ast::Block(b) => write!( - f, - "Block: <{}>", - b.iter() - .map(|(line, _)| line.to_string()) - .collect::>() - .join("\n") - ), - Ast::If(cond, then_branch, else_branch) => write!( - f, - "If: {} Then: {} Else: {}", - cond.0, then_branch.0, else_branch.0 - ), - Ast::Let(pattern, expression) => { - write!(f, "Let: {} = {}", pattern.0, expression.0) - } - Ast::Pair(kw, expr) => write!(f, "{} {}", kw, expr.0), - Ast::Dict(entries) => write!( - f, - "#{{{}}}", - entries - .iter() - .map(|(pair, _)| pair.to_string()) - .collect::>() - .join(", ") - ), - Ast::List(l) => write!( - f, - "List: [{}]", - l.iter() - .map(|(line, _)| line.to_string()) - .collect::>() - .join("\n") - ), - Ast::Tuple(t) | Ast::Arguments(t) => write!( - f, - "Tuple: ({})", - t.iter() - .map(|(line, _)| line.to_string()) - .collect::>() - .join("\n") - ), - Ast::Synthetic(root, first, rest) => write!( - f, - "Synth: [{}, {}, {}]", - root.0, - first.0, - rest.iter() - .map(|(term, _)| term.to_string()) - .collect::>() - .join("\n") - ), - _ => unimplemented!(), - } - } -} - -#[derive(Clone, Debug, PartialEq)] -enum Pattern<'src> { - Atom(Value<'src>), - Word(&'src str), - Placeholder, - Tuple(Vec>), - List(Vec>), - // is this the right representation for Dicts? - // Could/should this also be a Vec? - Pair(Value<'src>, Box>), - Dict(Vec>), -} - -impl<'src> fmt::Display for Pattern<'src> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Pattern::Atom(a) => write!(f, "{}", a), - Pattern::Word(w) => write!(f, "{}", w), - Pattern::Placeholder => write!(f, "_"), - Pattern::Tuple(t) => write!( - f, - "({})", - t.iter() - .map(|x| x.0.to_string()) - .collect::>() - .join(", ") - ), - Pattern::List(l) => write!( - f, - "({})", - l.iter() - .map(|x| x.0.to_string()) - .collect::>() - .join(", ") - ), - Pattern::Pair(kw, expr) => write!(f, "{} {}", kw, expr.0), - Pattern::Dict(entries) => write!( - f, - "#{{{}}}", - entries - .iter() - .map(|(pair, _)| pair.to_string()) - .collect::>() - .join(", ") - ), - } - } -} - -fn parser<'src, I>( -) -> impl Parser<'src, I, Spanned>, extra::Err, Span>>> + Clone -where - I: ValueInput<'src, Token = Token<'src>, Span = Span>, -{ - let mut expr = Recursive::declare(); - - let mut pattern = Recursive::declare(); - - let mut simple = Recursive::declare(); - - let mut nonbinding = Recursive::declare(); - - let separators = recursive(|separators| { - just(Token::Punctuation(",")) - .or(just(Token::Punctuation("\n"))) - .then(separators.clone().repeated()) - }); - - let terminators = recursive(|terminators| { - just(Token::Punctuation(";")) - .or(just(Token::Punctuation("\n"))) - .then(terminators.clone().repeated()) - }); - - let placeholder_pattern = - select! {Token::Punctuation("_") => Pattern::Placeholder}.map_with(|p, e| (p, e.span())); - - let word_pattern = - select! { Token::Word(w) => Pattern::Word(w) }.map_with(|w, e| (w, e.span())); - - let atom_pattern = select! { - Token::Nil => Pattern::Atom(Value::Nil), - Token::Boolean(b) => Pattern::Atom(Value::Boolean(b)), - Token::Number(n) => Pattern::Atom(Value::Number(n)), - Token::Keyword(k) => Pattern::Atom(Value::Keyword(k)), - } - .map_with(|a, e| (a, e.span())); - - let tuple_pattern = pattern - .clone() - .separated_by(separators.clone()) - .allow_leading() - .allow_trailing() - .collect() - .delimited_by(just(Token::Punctuation("(")), just(Token::Punctuation(")"))) - .map_with(|tuple, e| (Pattern::Tuple(tuple), e.span())) - .labelled("tuple pattern"); - - let list_pattern = pattern - .clone() - .separated_by(separators.clone()) - .allow_leading() - .allow_trailing() - .collect() - .delimited_by(just(Token::Punctuation("[")), just(Token::Punctuation("]"))) - .map_with(|list, e| (Pattern::List(list), e.span())); - - let pair_pattern = select! {Token::Keyword(k) => Value::Keyword(k)} - .then(pattern.clone()) - .map_with(|(kw, patt), e| (Pattern::Pair(kw, Box::new(patt)), e.span())); - - let shorthand_pattern = select! {Token::Word(w) => w}.map_with(|w, e| { - ( - Pattern::Pair(Value::Keyword(w), Box::new((Pattern::Word(w), e.span()))), - e.span(), - ) - }); - - let dict_pattern = pair_pattern - .or(shorthand_pattern) - .separated_by(separators.clone()) - .allow_leading() - .allow_trailing() - .collect() - .delimited_by( - just(Token::Punctuation("#{")), - just(Token::Punctuation("}")), - ) - .map_with(|dict, e| (Pattern::Dict(dict), e.span())); - - pattern.define( - atom_pattern - .or(word_pattern) - .or(placeholder_pattern) - .or(tuple_pattern.clone()) - .or(list_pattern) - .or(dict_pattern) - .labelled("pattern"), - ); - - let placeholder = - select! {Token::Punctuation("_") => Ast::Placeholder}.map_with(|p, e| (p, e.span())); - - let word = select! { Token::Word(w) => Ast::Word(w) } - .map_with(|w, e| (w, e.span())) - .labelled("word"); - - let value = select! { - Token::Nil => Ast::Value(Value::Nil), - Token::Boolean(b) => Ast::Value(Value::Boolean(b)), - Token::Number(n) => Ast::Value(Value::Number(n)), - Token::String(s) => Ast::Value(Value::String(s)), - } - .map_with(|v, e| (v, e.span())); - - let keyword = select! {Token::Keyword(k) => Ast::Value(Value::Keyword(k)),} - .map_with(|k, e| (k, e.span())); - - let tuple = simple - .clone() - .separated_by(separators.clone()) - .allow_leading() - .allow_trailing() - .collect() - .delimited_by(just(Token::Punctuation("(")), just(Token::Punctuation(")"))) - .map_with(|tuple, e| (Ast::Tuple(tuple), e.span())); - - let args = simple - .clone() - .or(placeholder) - .separated_by(separators.clone()) - .allow_leading() - .allow_trailing() - .collect() - .delimited_by(just(Token::Punctuation("(")), just(Token::Punctuation(")"))) - .map_with(|args, e| (Ast::Arguments(args), e.span())); - - let synth_root = word.clone().or(keyword.clone()); - - let synth_term = keyword.clone().or(args); - - let synthetic = synth_root - .then(synth_term.clone()) - .then(synth_term.clone().repeated().collect()) - .map_with(|((root, first), rest), e| { - ( - Ast::Synthetic(Box::new(root), Box::new(first), rest), - e.span(), - ) - }); - - let list = simple - .clone() - .separated_by(separators.clone()) - .allow_leading() - .allow_trailing() - .collect() - .delimited_by(just(Token::Punctuation("[")), just(Token::Punctuation("]"))) - .map_with(|list, e| (Ast::List(list), e.span())); - - let pair = select! {Token::Keyword(k) => Value::Keyword(k)} - .then(simple.clone()) - .map_with(|(kw, expr), e| (Ast::Pair(kw, Box::new(expr)), e.span())); - - let shorthand = select! {Token::Word(w) => w}.map_with(|w, e| { - ( - Ast::Pair(Value::Keyword(w), Box::new((Ast::Word(w), e.span()))), - e.span(), - ) - }); - - let dict = pair - .or(shorthand) - .separated_by(separators.clone()) - .allow_leading() - .allow_trailing() - .collect() - .delimited_by( - just(Token::Punctuation("#{")), - just(Token::Punctuation("}")), - ) - .map_with(|dict, e| (Ast::Dict(dict), e.span())); - - simple.define( - synthetic - .or(word) - .or(keyword) - .or(value) - .or(tuple) - .or(list) - .or(dict) - .labelled("simple expression"), - ); - - let block = expr - .clone() - .separated_by(terminators.clone()) - .allow_leading() - .allow_trailing() - .collect() - .delimited_by(just(Token::Punctuation("{")), just(Token::Punctuation("}"))) - .map_with(|block, e| (Ast::Block(block), e.span())) - .recover_with(via_parser(nested_delimiters( - Token::Punctuation("{"), - Token::Punctuation("}"), - [ - (Token::Punctuation("("), Token::Punctuation(")")), - (Token::Punctuation("["), Token::Punctuation("]")), - ], - |span| (Ast::Error, span), - ))); - - let if_ = just(Token::Reserved("if")) - .ignore_then(simple.clone()) - .then_ignore(just(Token::Reserved("then"))) - .then(expr.clone()) - .then_ignore(just(Token::Reserved("else"))) - .then(expr.clone()) - .map_with(|((condition, then_branch), else_branch), e| { - ( - Ast::If( - Box::new(condition), - Box::new(then_branch), - Box::new(else_branch), - ), - e.span(), - ) - }); - - let when_clause = simple - .clone() - .then_ignore(just(Token::Punctuation("->"))) - .then(expr.clone()) - .map_with(|(cond, body), e| (Ast::WhenClause(Box::new(cond), Box::new(body)), e.span())); - - let when = just(Token::Reserved("when")) - .ignore_then( - when_clause - .separated_by(terminators.clone()) - .allow_trailing() - .allow_leading() - .collect() - .delimited_by(just(Token::Punctuation("{")), just(Token::Punctuation("}"))), - ) - .map_with(|clauses, e| (Ast::When(clauses), e.span())); - - let match_clause = pattern - .clone() - .then_ignore(just(Token::Punctuation("->"))) - .then(expr.clone()) - .map_with(|(patt, body), e| (Ast::MatchClause(Box::new(patt), Box::new(body)), e.span())); - - let match_ = just(Token::Reserved("match")) - .ignore_then(simple.clone()) - .then_ignore(just(Token::Reserved("with"))) - .then( - match_clause - .clone() - .separated_by(terminators.clone()) - .allow_leading() - .allow_trailing() - .collect() - .delimited_by(just(Token::Punctuation("{")), just(Token::Punctuation("}"))), - ) - .map_with(|(expr, clauses), e| (Ast::Match(Box::new(expr), clauses), e.span())); - - let conditional = when.or(if_).or(match_); - - //todo: - // * [x] do - // * [ ] loop - // * [ ] repeat - // * [x] panic! - - let panic = just(Token::Reserved("panic!")) - .ignore_then(nonbinding.clone()) - .map_with(|expr, e| (Ast::Panic(Box::new(expr)), e.span())); - - let do_ = just(Token::Reserved("do")) - .ignore_then( - nonbinding - .clone() - .separated_by( - just(Token::Punctuation(">")).then(just(Token::Punctuation("\n")).repeated()), - ) - .collect(), - ) - .map_with(|exprs, e| (Ast::Do(exprs), e.span())); - - let repeat = just(Token::Reserved("repeat")) - .ignore_then(simple.clone()) - .then(block.clone()) - .map_with(|(count, body), e| (Ast::Repeat(Box::new(count), Box::new(body)), e.span())); - - let fn_clause = tuple_pattern - .clone() - .then_ignore(just(Token::Punctuation("->"))) - .then(nonbinding.clone()) - .map_with(|(pattern, body), e| (Ast::FnClause(Box::new(pattern), Box::new(body)), e.span())) - .labelled("function clause"); - - let lambda = just(Token::Reserved("fn")) - .ignore_then(fn_clause.clone()) - .map_with(|clause, e| (Ast::Fn("anonymous", vec![clause]), e.span())); - - nonbinding.define( - simple - .clone() - .or(conditional) - .or(block) - .or(lambda) - .or(panic) - .or(do_) - .or(repeat) - .labelled("nonbinding expression"), - ); - - let let_ = just(Token::Reserved("let")) - .ignore_then(pattern.clone()) - .then_ignore(just(Token::Punctuation("="))) - .then(nonbinding.clone()) - .map_with(|(pattern, expression), e| { - (Ast::Let(Box::new(pattern), Box::new(expression)), e.span()) - }); - - let box_ = just(Token::Reserved("box")) - .ignore_then(word.clone()) - .then_ignore(just(Token::Punctuation("="))) - .then(nonbinding.clone()) - .map_with(|(word, expr), e| { - let name = if let Ast::Word(w) = word.0 { - w - } else { - unreachable!() - }; - (Ast::Box(name, Box::new(expr)), e.span()) - }); - - let fn_decl = just(Token::Reserved("fn")) - .ignore_then(word.clone()) - .map_with(|(word, _), e| { - let name = if let Ast::Word(w) = word { - w - } else { - unreachable!() - }; - (Ast::FnDeclaration(name), e.span()) - }); - - // let tuple_pattern = pattern - // .clone() - // .separated_by(separators.clone()) - // .allow_leading() - // .allow_trailing() - // .collect() - // .delimited_by(just(Token::Punctuation("(")), just(Token::Punctuation(")"))) - // .map_with(|tuple, e| (Pattern::Tuple(tuple), e.span())); - - let fn_named = just(Token::Reserved("fn")) - .ignore_then(word.clone()) - .then(fn_clause.clone()) - .map_with(|(word, clause), e| { - let name = if let Ast::Word(word) = word.0 { - word - } else { - unreachable!() - }; - (Ast::Fn(name, vec![clause]), e.span()) - }); - - let fn_compound = just(Token::Reserved("fn")) - .ignore_then(word.clone()) - .then( - fn_clause - .clone() - .separated_by(terminators.clone()) - .allow_leading() - .allow_trailing() - .collect() - .delimited_by(just(Token::Punctuation("{")), just(Token::Punctuation("}"))), - ) - .map_with(|(word, clauses), e| { - let name = if let Ast::Word(word) = word.0 { - word - } else { - unreachable!() - }; - (Ast::Fn(name, clauses), e.span()) - }); - - let fn_ = fn_named.or(fn_compound).or(fn_decl); - - let binding = let_.or(box_).or(fn_); - - expr.define(binding.or(nonbinding)); - - let script = expr - .separated_by(terminators.clone()) - .allow_trailing() - .allow_leading() - .collect() - .map_with(|exprs, e| (Ast::Block(exprs), e.span())); - - script -} +mod parser; +use crate::parser::*; pub fn main() { let src = "let #{a, :b b} = foo\na(b(c),d)"; diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..b1896ac --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,544 @@ +use crate::lexer::*; +use crate::spans::*; +use crate::value::*; +use chumsky::{input::ValueInput, prelude::*, recursive::Recursive}; +use std::fmt; + +#[derive(Clone, Debug, PartialEq)] +pub enum Ast<'src> { + Error, + Placeholder, + Value(Value<'src>), + Word(&'src str), + Block(Vec>), + If(Box>, Box>, Box>), + Tuple(Vec>), + Arguments(Vec>), + List(Vec>), + Pair(Value<'src>, Box>), + Dict(Vec>), + Let(Box>>, Box>), + Box(&'src str, Box>), + Synthetic(Box>, Box>, Vec>), + WhenClause(Box>, Box>), + When(Vec>), + MatchClause(Box>>, Box>), + Match(Box>, Vec>), + FnClause(Box>>, Box>), + Fn(&'src str, Vec>), + FnDeclaration(&'src str), + Panic(Box>), + Do(Vec>), + Repeat(Box>, Box>), + Loop(Box>, Vec>), + Recur(Vec>), +} + +impl<'src> fmt::Display for Ast<'src> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Ast::Error => write!(f, "Error"), + Ast::Value(v) => write!(f, "Value: {}", v), + Ast::Word(w) => write!(f, "Word: {}", w), + Ast::Block(b) => write!( + f, + "Block: <{}>", + b.iter() + .map(|(line, _)| line.to_string()) + .collect::>() + .join("\n") + ), + Ast::If(cond, then_branch, else_branch) => write!( + f, + "If: {} Then: {} Else: {}", + cond.0, then_branch.0, else_branch.0 + ), + Ast::Let(pattern, expression) => { + write!(f, "Let: {} = {}", pattern.0, expression.0) + } + Ast::Pair(kw, expr) => write!(f, "{} {}", kw, expr.0), + Ast::Dict(entries) => write!( + f, + "#{{{}}}", + entries + .iter() + .map(|(pair, _)| pair.to_string()) + .collect::>() + .join(", ") + ), + Ast::List(l) => write!( + f, + "List: [{}]", + l.iter() + .map(|(line, _)| line.to_string()) + .collect::>() + .join("\n") + ), + Ast::Tuple(t) | Ast::Arguments(t) => write!( + f, + "Tuple: ({})", + t.iter() + .map(|(line, _)| line.to_string()) + .collect::>() + .join("\n") + ), + Ast::Synthetic(root, first, rest) => write!( + f, + "Synth: [{}, {}, {}]", + root.0, + first.0, + rest.iter() + .map(|(term, _)| term.to_string()) + .collect::>() + .join("\n") + ), + _ => unimplemented!(), + } + } +} + +#[derive(Clone, Debug, PartialEq)] +pub enum Pattern<'src> { + Atom(Value<'src>), + Word(&'src str), + Placeholder, + Tuple(Vec>), + List(Vec>), + // is this the right representation for Dicts? + // Could/should this also be a Vec? + Pair(Value<'src>, Box>), + Dict(Vec>), +} + +impl<'src> fmt::Display for Pattern<'src> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Pattern::Atom(a) => write!(f, "{}", a), + Pattern::Word(w) => write!(f, "{}", w), + Pattern::Placeholder => write!(f, "_"), + Pattern::Tuple(t) => write!( + f, + "({})", + t.iter() + .map(|x| x.0.to_string()) + .collect::>() + .join(", ") + ), + Pattern::List(l) => write!( + f, + "({})", + l.iter() + .map(|x| x.0.to_string()) + .collect::>() + .join(", ") + ), + Pattern::Pair(kw, expr) => write!(f, "{} {}", kw, expr.0), + Pattern::Dict(entries) => write!( + f, + "#{{{}}}", + entries + .iter() + .map(|(pair, _)| pair.to_string()) + .collect::>() + .join(", ") + ), + } + } +} + +pub fn parser<'src, I>( +) -> impl Parser<'src, I, Spanned>, extra::Err, Span>>> + Clone +where + I: ValueInput<'src, Token = Token<'src>, Span = Span>, +{ + let mut expr = Recursive::declare(); + + let mut pattern = Recursive::declare(); + + let mut simple = Recursive::declare(); + + let mut nonbinding = Recursive::declare(); + + let separators = recursive(|separators| { + just(Token::Punctuation(",")) + .or(just(Token::Punctuation("\n"))) + .then(separators.clone().repeated()) + }); + + let terminators = recursive(|terminators| { + just(Token::Punctuation(";")) + .or(just(Token::Punctuation("\n"))) + .then(terminators.clone().repeated()) + }); + + let placeholder_pattern = + select! {Token::Punctuation("_") => Pattern::Placeholder}.map_with(|p, e| (p, e.span())); + + let word_pattern = + select! { Token::Word(w) => Pattern::Word(w) }.map_with(|w, e| (w, e.span())); + + let atom_pattern = select! { + Token::Nil => Pattern::Atom(Value::Nil), + Token::Boolean(b) => Pattern::Atom(Value::Boolean(b)), + Token::Number(n) => Pattern::Atom(Value::Number(n)), + Token::Keyword(k) => Pattern::Atom(Value::Keyword(k)), + } + .map_with(|a, e| (a, e.span())); + + let tuple_pattern = pattern + .clone() + .separated_by(separators.clone()) + .allow_leading() + .allow_trailing() + .collect() + .delimited_by(just(Token::Punctuation("(")), just(Token::Punctuation(")"))) + .map_with(|tuple, e| (Pattern::Tuple(tuple), e.span())) + .labelled("tuple pattern"); + + let list_pattern = pattern + .clone() + .separated_by(separators.clone()) + .allow_leading() + .allow_trailing() + .collect() + .delimited_by(just(Token::Punctuation("[")), just(Token::Punctuation("]"))) + .map_with(|list, e| (Pattern::List(list), e.span())); + + let pair_pattern = select! {Token::Keyword(k) => Value::Keyword(k)} + .then(pattern.clone()) + .map_with(|(kw, patt), e| (Pattern::Pair(kw, Box::new(patt)), e.span())); + + let shorthand_pattern = select! {Token::Word(w) => w}.map_with(|w, e| { + ( + Pattern::Pair(Value::Keyword(w), Box::new((Pattern::Word(w), e.span()))), + e.span(), + ) + }); + + let dict_pattern = pair_pattern + .or(shorthand_pattern) + .separated_by(separators.clone()) + .allow_leading() + .allow_trailing() + .collect() + .delimited_by( + just(Token::Punctuation("#{")), + just(Token::Punctuation("}")), + ) + .map_with(|dict, e| (Pattern::Dict(dict), e.span())); + + pattern.define( + atom_pattern + .or(word_pattern) + .or(placeholder_pattern) + .or(tuple_pattern.clone()) + .or(list_pattern) + .or(dict_pattern) + .labelled("pattern"), + ); + + let placeholder = + select! {Token::Punctuation("_") => Ast::Placeholder}.map_with(|p, e| (p, e.span())); + + let word = select! { Token::Word(w) => Ast::Word(w) } + .map_with(|w, e| (w, e.span())) + .labelled("word"); + + let value = select! { + Token::Nil => Ast::Value(Value::Nil), + Token::Boolean(b) => Ast::Value(Value::Boolean(b)), + Token::Number(n) => Ast::Value(Value::Number(n)), + Token::String(s) => Ast::Value(Value::String(s)), + } + .map_with(|v, e| (v, e.span())); + + let keyword = select! {Token::Keyword(k) => Ast::Value(Value::Keyword(k)),} + .map_with(|k, e| (k, e.span())); + + let tuple = simple + .clone() + .separated_by(separators.clone()) + .allow_leading() + .allow_trailing() + .collect() + .delimited_by(just(Token::Punctuation("(")), just(Token::Punctuation(")"))) + .map_with(|tuple, e| (Ast::Tuple(tuple), e.span())); + + let args = simple + .clone() + .or(placeholder) + .separated_by(separators.clone()) + .allow_leading() + .allow_trailing() + .collect() + .delimited_by(just(Token::Punctuation("(")), just(Token::Punctuation(")"))) + .map_with(|args, e| (Ast::Arguments(args), e.span())); + + let synth_root = word.clone().or(keyword.clone()); + + let synth_term = keyword.clone().or(args); + + let synthetic = synth_root + .then(synth_term.clone()) + .then(synth_term.clone().repeated().collect()) + .map_with(|((root, first), rest), e| { + ( + Ast::Synthetic(Box::new(root), Box::new(first), rest), + e.span(), + ) + }); + + let list = simple + .clone() + .separated_by(separators.clone()) + .allow_leading() + .allow_trailing() + .collect() + .delimited_by(just(Token::Punctuation("[")), just(Token::Punctuation("]"))) + .map_with(|list, e| (Ast::List(list), e.span())); + + let pair = select! {Token::Keyword(k) => Value::Keyword(k)} + .then(simple.clone()) + .map_with(|(kw, expr), e| (Ast::Pair(kw, Box::new(expr)), e.span())); + + let shorthand = select! {Token::Word(w) => w}.map_with(|w, e| { + ( + Ast::Pair(Value::Keyword(w), Box::new((Ast::Word(w), e.span()))), + e.span(), + ) + }); + + let dict = pair + .or(shorthand) + .separated_by(separators.clone()) + .allow_leading() + .allow_trailing() + .collect() + .delimited_by( + just(Token::Punctuation("#{")), + just(Token::Punctuation("}")), + ) + .map_with(|dict, e| (Ast::Dict(dict), e.span())); + + simple.define( + synthetic + .or(word) + .or(keyword) + .or(value) + .or(tuple) + .or(list) + .or(dict) + .labelled("simple expression"), + ); + + let block = expr + .clone() + .separated_by(terminators.clone()) + .allow_leading() + .allow_trailing() + .collect() + .delimited_by(just(Token::Punctuation("{")), just(Token::Punctuation("}"))) + .map_with(|block, e| (Ast::Block(block), e.span())) + .recover_with(via_parser(nested_delimiters( + Token::Punctuation("{"), + Token::Punctuation("}"), + [ + (Token::Punctuation("("), Token::Punctuation(")")), + (Token::Punctuation("["), Token::Punctuation("]")), + ], + |span| (Ast::Error, span), + ))); + + let if_ = just(Token::Reserved("if")) + .ignore_then(simple.clone()) + .then_ignore(just(Token::Reserved("then"))) + .then(expr.clone()) + .then_ignore(just(Token::Reserved("else"))) + .then(expr.clone()) + .map_with(|((condition, then_branch), else_branch), e| { + ( + Ast::If( + Box::new(condition), + Box::new(then_branch), + Box::new(else_branch), + ), + e.span(), + ) + }); + + let when_clause = simple + .clone() + .then_ignore(just(Token::Punctuation("->"))) + .then(expr.clone()) + .map_with(|(cond, body), e| (Ast::WhenClause(Box::new(cond), Box::new(body)), e.span())); + + let when = just(Token::Reserved("when")) + .ignore_then( + when_clause + .separated_by(terminators.clone()) + .allow_trailing() + .allow_leading() + .collect() + .delimited_by(just(Token::Punctuation("{")), just(Token::Punctuation("}"))), + ) + .map_with(|clauses, e| (Ast::When(clauses), e.span())); + + let match_clause = pattern + .clone() + .then_ignore(just(Token::Punctuation("->"))) + .then(expr.clone()) + .map_with(|(patt, body), e| (Ast::MatchClause(Box::new(patt), Box::new(body)), e.span())); + + let match_ = just(Token::Reserved("match")) + .ignore_then(simple.clone()) + .then_ignore(just(Token::Reserved("with"))) + .then( + match_clause + .clone() + .separated_by(terminators.clone()) + .allow_leading() + .allow_trailing() + .collect() + .delimited_by(just(Token::Punctuation("{")), just(Token::Punctuation("}"))), + ) + .map_with(|(expr, clauses), e| (Ast::Match(Box::new(expr), clauses), e.span())); + + let conditional = when.or(if_).or(match_); + + //todo: + // * [x] do + // * [ ] loop + // * [ ] repeat + // * [x] panic! + + let panic = just(Token::Reserved("panic!")) + .ignore_then(nonbinding.clone()) + .map_with(|expr, e| (Ast::Panic(Box::new(expr)), e.span())); + + let do_ = just(Token::Reserved("do")) + .ignore_then( + nonbinding + .clone() + .separated_by( + just(Token::Punctuation(">")).then(just(Token::Punctuation("\n")).repeated()), + ) + .collect(), + ) + .map_with(|exprs, e| (Ast::Do(exprs), e.span())); + + let repeat = just(Token::Reserved("repeat")) + .ignore_then(simple.clone()) + .then(block.clone()) + .map_with(|(count, body), e| (Ast::Repeat(Box::new(count), Box::new(body)), e.span())); + + let fn_clause = tuple_pattern + .clone() + .then_ignore(just(Token::Punctuation("->"))) + .then(nonbinding.clone()) + .map_with(|(pattern, body), e| (Ast::FnClause(Box::new(pattern), Box::new(body)), e.span())) + .labelled("function clause"); + + let lambda = just(Token::Reserved("fn")) + .ignore_then(fn_clause.clone()) + .map_with(|clause, e| (Ast::Fn("anonymous", vec![clause]), e.span())); + + nonbinding.define( + simple + .clone() + .or(conditional) + .or(block) + .or(lambda) + .or(panic) + .or(do_) + .or(repeat) + .labelled("nonbinding expression"), + ); + + let let_ = just(Token::Reserved("let")) + .ignore_then(pattern.clone()) + .then_ignore(just(Token::Punctuation("="))) + .then(nonbinding.clone()) + .map_with(|(pattern, expression), e| { + (Ast::Let(Box::new(pattern), Box::new(expression)), e.span()) + }); + + let box_ = just(Token::Reserved("box")) + .ignore_then(word.clone()) + .then_ignore(just(Token::Punctuation("="))) + .then(nonbinding.clone()) + .map_with(|(word, expr), e| { + let name = if let Ast::Word(w) = word.0 { + w + } else { + unreachable!() + }; + (Ast::Box(name, Box::new(expr)), e.span()) + }); + + let fn_decl = just(Token::Reserved("fn")) + .ignore_then(word.clone()) + .map_with(|(word, _), e| { + let name = if let Ast::Word(w) = word { + w + } else { + unreachable!() + }; + (Ast::FnDeclaration(name), e.span()) + }); + + // let tuple_pattern = pattern + // .clone() + // .separated_by(separators.clone()) + // .allow_leading() + // .allow_trailing() + // .collect() + // .delimited_by(just(Token::Punctuation("(")), just(Token::Punctuation(")"))) + // .map_with(|tuple, e| (Pattern::Tuple(tuple), e.span())); + + let fn_named = just(Token::Reserved("fn")) + .ignore_then(word.clone()) + .then(fn_clause.clone()) + .map_with(|(word, clause), e| { + let name = if let Ast::Word(word) = word.0 { + word + } else { + unreachable!() + }; + (Ast::Fn(name, vec![clause]), e.span()) + }); + + let fn_compound = just(Token::Reserved("fn")) + .ignore_then(word.clone()) + .then( + fn_clause + .clone() + .separated_by(terminators.clone()) + .allow_leading() + .allow_trailing() + .collect() + .delimited_by(just(Token::Punctuation("{")), just(Token::Punctuation("}"))), + ) + .map_with(|(word, clauses), e| { + let name = if let Ast::Word(word) = word.0 { + word + } else { + unreachable!() + }; + (Ast::Fn(name, clauses), e.span()) + }); + + let fn_ = fn_named.or(fn_compound).or(fn_decl); + + let binding = let_.or(box_).or(fn_); + + expr.define(binding.or(nonbinding)); + + let script = expr + .separated_by(terminators.clone()) + .allow_trailing() + .allow_leading() + .collect() + .map_with(|exprs, e| (Ast::Block(exprs), e.span())); + + script +} diff --git a/src/spans.rs b/src/spans.rs new file mode 100644 index 0000000..04bd4c3 --- /dev/null +++ b/src/spans.rs @@ -0,0 +1,4 @@ +use chumsky::prelude::*; + +pub type Span = SimpleSpan; +pub type Spanned = (T, Span); diff --git a/src/value.rs b/src/value.rs new file mode 100644 index 0000000..ac8b3a0 --- /dev/null +++ b/src/value.rs @@ -0,0 +1,71 @@ +use crate::parser::*; +use crate::spans::*; +use imbl::*; +use std::fmt; + +#[derive(Clone, Debug, PartialEq)] +pub struct Clause<'src> { + patt: Pattern<'src>, + span: Span, + body: Ast<'src>, +} + +#[derive(Clone, Debug, PartialEq)] +pub struct Fn<'src> { + name: &'src str, + span: Span, + body: Vec>, +} + +#[derive(Clone, Debug, PartialEq)] +pub enum Value<'src> { + Nil, + Boolean(bool), + Number(f64), + Keyword(&'src str), + String(&'src str), + Tuple(Vec), // on the heap for now + List(Vector), // ref-counted, immutable, persistent + Dict(HashMap<&'src str, Self>), // ref-counted, immutable, persistent + Fn(&'src Fn<'src>), + // Set(HashSet), + // Sets are hard + // Sets require Eq + // Eq is not implemented on f64, because NaNs + // We could use ordered_float::NotNan + // Let's defer that + // We're not really using sets in Ludus + + // Other things we're not implementing yet: + // pkgs, nses, tests +} + +impl<'src> fmt::Display for Value<'src> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Value::Nil => write!(f, "nil"), + Value::Boolean(b) => write!(f, "{}", b), + Value::Number(n) => write!(f, "{}", n), + Value::Keyword(k) => write!(f, ":{}", k), + Value::String(s) => write!(f, "\"{}\"", s), + Value::Fn(fun) => write!(f, "fn {}", fun.name), + Value::Tuple(t) => write!( + f, + "({})", + t.iter() + .map(|x| x.to_string()) + .collect::>() + .join(", ") + ), + Value::List(l) => write!( + f, + "[{}]", + l.iter() + .map(|x| x.to_string()) + .collect::>() + .join(", ") + ), + Value::Dict(d) => write!(f, "#{{{:?}}}", d), + } + } +} diff --git a/src/vm.rs b/src/vm.rs new file mode 100644 index 0000000..e69de29