use chumsky::prelude::*; use std::fmt; pub type Span = SimpleSpan; #[derive(Clone, Debug, PartialEq)] pub enum Token<'src> { Number(f64), Word(&'src str), Boolean(bool), Keyword(&'src str), String(&'src str), Reserved(&'src str), Nil, Punctuation(&'src str), } impl<'src> fmt::Display for Token<'src> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Token::Number(n) => write!(f, "{}", n), Token::Word(w) => write!(f, "{}", w), Token::Boolean(b) => write!(f, "{}", b), Token::Keyword(k) => write!(f, ":{}", k), Token::String(s) => write!(f, "{}", s), Token::Reserved(r) => write!(f, "{}", r), Token::Nil => write!(f, "nil"), Token::Punctuation(p) => write!(f, "{}", p), } } } pub fn lexer<'src>( ) -> impl Parser<'src, &'src str, Vec<(Token<'src>, Span)>, extra::Err>> { let number = just('-') .or_not() .then(text::int(10).then(just('.').then(text::digits(10)).or_not())) .to_slice() .from_str() .unwrapped() .map(Token::Number); let word = any() .filter(char::is_ascii_lowercase) .then( any() .filter(char::is_ascii_alphanumeric) .or(one_of("*/?!_")) .repeated(), ) .to_slice(); let reserved_or_word = word.map(|word: &str| match word { "true" => Token::Boolean(true), "false" => Token::Boolean(false), "nil" => Token::Nil, "as" | "box" | "do" | "else" | "fn" | "if" | "import" | "let" | "loop" | "match" | "ns" | "panic!" | "pkg" | "recur" | "repeat" | "test" | "then" | "use" | "when" | "with" => { Token::Reserved(word) } _ => Token::Word(word), }); let keyword = just(':').ignore_then(word.clone()).map(Token::Keyword); let string = just('"') .ignore_then(none_of("\"").repeated().to_slice()) .then_ignore(just('"')) .map(Token::String); let punctuation = one_of(",=[]{}()>;\n") .to_slice() .or(just("->")) .or(just("...")) .or(just("#{")) .or(just("${")) .map(Token::Punctuation); let token = number .or(reserved_or_word) .or(keyword) .or(string) .or(punctuation); let comment = just('&') .ignore_then(any().and_is(just('\n').not()).repeated()) .repeated(); let ludus_ws = just(' ').or(just('\t')).repeated(); token .map_with(|tok, e| (tok, e.span())) .padded_by(ludus_ws) .padded_by(comment) .recover_with(skip_then_retry_until(any().ignored(), end())) .repeated() .collect() } #[cfg(test)] mod tests { use crate::lexer; use crate::Token; use chumsky::prelude::*; #[test] fn it_lexes_positive_ints() { let (mytoken, _) = lexer().parse("42").unwrap()[0].clone(); assert_eq!(mytoken, Token::Number(42.0)) } #[test] fn it_lexes_negative_ints() { let (mytoken, _) = lexer().parse("-42").unwrap()[0].clone(); assert_eq!(mytoken, Token::Number(-42.0)) } #[test] fn it_lexes_positive_floats() { let (mytoken, _) = lexer().parse("42.032").unwrap()[0].clone(); assert_eq!(mytoken, Token::Number(42.032)) } #[test] fn it_lexes_positive_decimals() { let (mytoken, _) = lexer().parse("0.123").unwrap()[0].clone(); assert_eq!(mytoken, Token::Number(0.123)) } #[test] fn it_lexes_negative_floats() { let mytoken = lexer().parse("-42.123").unwrap()[0].clone().0; assert_eq!(mytoken, Token::Number(-42.123)) } #[test] fn it_lexes_negative_decimals() { let mytoken = lexer().parse("-0.123").unwrap()[0].clone().0; assert_eq!(mytoken, Token::Number(-0.123)) } #[test] fn it_lexes_bools() { let tt = lexer().parse("true").unwrap()[0].clone().0; assert_eq!(tt, Token::Boolean(true)); let ff = lexer().parse("false").unwrap()[0].clone().0; assert_eq!(ff, Token::Boolean(false)) } #[test] fn it_lexes_words() { let mytoken = lexer().parse("foo").unwrap()[0].clone().0; assert_eq!(mytoken, Token::Word("foo")) } #[test] fn it_lexes_keywords() { let kw = lexer().parse(":foo").unwrap()[0].clone().0; assert_eq!(kw, Token::Keyword("foo")) } #[test] fn it_lexes_strings() { let s = lexer().parse("\"foo bar baz\"").unwrap()[0].clone().0; assert_eq!(s, Token::String("foo bar baz")) } #[test] fn it_ignores_comments() { let e = lexer().parse("foo &bar\nbaz").unwrap(); assert_eq!(e[0].0, Token::Word("foo")); assert_eq!(e[2].0, Token::Word("baz")) } #[test] fn it_lexes_multiple_tokens() { let toks = lexer().parse("foo;bar\nbaz").unwrap(); assert_eq!(toks[0].0, Token::Word("foo")); assert_eq!(toks[2].0, Token::Word("bar")); assert_eq!(toks[4].0, Token::Word("baz")) } #[test] fn it_lexes_collections() { let toks = lexer().parse("(1, 2)").unwrap(); assert_eq!(toks[0].0, Token::Punctuation("(")); assert_eq!(toks[1].0, Token::Number(1.0)); assert_eq!(toks[2].0, Token::Punctuation(",")); assert_eq!(toks[3].0, Token::Number(2.0)); assert_eq!(toks[4].0, Token::Punctuation(")")) } }