use crate::spans::*; use chumsky::prelude::*; use std::fmt; #[derive(Clone, Debug, PartialEq)] pub enum Token<'src> { Nil, Number(f64), Word(&'src str), Boolean(bool), Keyword(&'src str), String(&'src str), // todo: hard code these types Reserved(&'src str), Punctuation(&'src str), } impl fmt::Display for Token<'_> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Token::Number(n) => write!(f, "[Number {}]", n), Token::Word(w) => write!(f, "[Word {}]", w), Token::Boolean(b) => write!(f, "[Boolean {}]", b), Token::Keyword(k) => write!(f, "[Keyword :{}]", k), Token::String(s) => write!(f, "[String {}]", s), Token::Reserved(r) => write!(f, "[Reserved {}]", r), Token::Nil => write!(f, "[nil]"), Token::Punctuation(p) => write!(f, "[Punctuation {}]", p), } } } pub fn lexer<'src>( ) -> impl Parser<'src, &'src str, Vec<(Token<'src>, Span)>, extra::Err>> { let number = just('-') .or_not() .then(text::int(10).then(just('.').then(text::digits(10)).or_not())) .to_slice() .from_str() .unwrapped() .map(Token::Number); let word = any() .filter(char::is_ascii_lowercase) .then( any() .filter(char::is_ascii_alphanumeric) .or(one_of("*/?!_")) .repeated(), ) .to_slice(); let reserved_or_word = word.map(|word: &str| match word { "true" => Token::Boolean(true), "false" => Token::Boolean(false), "nil" => Token::Nil, // todo: hard code these as type constructors "as" | "box" | "do" | "else" | "fn" | "if" | "let" | "loop" | "match" | "panic!" | "recur" | "repeat" | "then" | "when" | "with" => Token::Reserved(word), _ => Token::Word(word), }); let keyword = just(':').ignore_then(word).map(Token::Keyword); let string = just('"') .ignore_then(none_of("\"").repeated().to_slice()) .then_ignore(just('"')) .map(Token::String); // todo: hard code these as type constructors let punctuation = one_of(",=[]{}()>;\n_") .to_slice() .or(just("->")) .or(just("...")) .or(just("#{")) .or(just("${")) .map(Token::Punctuation); let token = number .or(reserved_or_word) .or(keyword) .or(string) .or(punctuation); let comment = just('&') .ignore_then(any().and_is(just('\n').not()).repeated()) .repeated(); let ludus_ws = just(' ').or(just('\t')).repeated(); token .map_with(|tok, e| (tok, e.span())) .padded_by(ludus_ws) .padded_by(comment) .recover_with(skip_then_retry_until(any().ignored(), end())) .repeated() .collect() }