use crate::spans::*; use chumsky::prelude::*; use std::fmt; #[derive(Clone, Debug, PartialEq)] pub enum Token { Nil, Number(f64), Word(&'static str), Boolean(bool), Keyword(&'static str), String(&'static str), // todo: hard code these types Reserved(&'static str), Punctuation(&'static str), Method(&'static str), } impl fmt::Display for Token { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Token::Number(n) => write!(f, "[Number {}]", n), Token::Word(w) => write!(f, "[Word {}]", w), Token::Boolean(b) => write!(f, "[Boolean {}]", b), Token::Keyword(k) => write!(f, "[Keyword :{}]", k), Token::String(s) => write!(f, "[String {}]", s), Token::Reserved(r) => write!(f, "[Reserved {}]", r), Token::Nil => write!(f, "[nil]"), Token::Punctuation(p) => write!(f, "[Punctuation {}]", p), Token::Method(m) => write!(f, "[Method {m}]"), } } } pub fn lexer( ) -> impl Parser<'static, &'static str, Vec<(Token, Span)>, extra::Err>> { let number = just('-') .or_not() .then(text::int(10).then(just('.').then(text::digits(10)).or_not())) .to_slice() .from_str() .unwrapped() .map(Token::Number); let word = any() .filter(char::is_ascii_lowercase) .then( any() .filter(char::is_ascii_alphanumeric) .or(one_of("*/?!_")) .repeated(), ) .to_slice(); let reserved_or_word = word.map(|word: &str| match word { "true" => Token::Boolean(true), "false" => Token::Boolean(false), "nil" => Token::Nil, // todo: hard code these as type constructors "as" | "box" | "do" | "else" | "fn" | "if" | "let" | "loop" | "match" | "panic!" | "recur" | "repeat" | "then" | "when" | "with" | "or" | "and" | "receive" => { Token::Reserved(word) } _ => Token::Word(word), }); let method = just("::").ignore_then(word).map(Token::Method); let keyword = just(':').ignore_then(word).map(Token::Keyword); let escape = just('\\') .then(choice(( just('\\').to('\\'), just('"').to('"'), just('n').to('\n'), just('t').to('\t'), just('r').to('\r'), ))) .ignored(); let string = none_of("\\\"") .ignored() .or(escape) .repeated() .to_slice() .map(Token::String) .delimited_by(just('"'), just('"')); // todo: hard code these as type constructors let punctuation = one_of(",=[]{}()>;\n_") .to_slice() .or(just("->")) .or(just("...")) .or(just("#{")) .or(just("${")) .map(Token::Punctuation); let token = number .or(reserved_or_word) .or(keyword) .or(method) .or(string) .or(punctuation); let comment = just('&') .ignore_then(any().and_is(just('\n').not()).repeated()) .repeated(); let ludus_ws = just(' ').or(just('\t')).repeated(); token .map_with(|tok, e| (tok, e.span())) .padded_by(ludus_ws) .padded_by(comment) .recover_with(skip_then_retry_until(any().ignored(), end())) .repeated() .collect() } #[cfg(test)] mod tests { use super::*; #[test] fn it_lexes_nil() { let spanned_toks = lexer().parse("nil").into_output_errors().0.unwrap(); let (token, _) = spanned_toks[0].clone(); assert_eq!(token, Token::Nil); } #[test] fn it_lexes_strings() { let spanned_toks = lexer() .parse("\"foo bar baz\"") .into_output_errors() .0 .unwrap(); let (token, _) = spanned_toks[0].clone(); assert_eq!(token, Token::String("foo bar baz")); } #[test] fn it_lexes_strings_w_escaped_quotes() { let spanned_toks = lexer() .parse("\"foo \\\"bar baz\"") .into_output_errors() .0 .unwrap(); let (token, _) = spanned_toks[0].clone(); assert_eq!(token, Token::String("foo \\\"bar baz")); } }