// use ariadne::{sources, Color, Label, Report, ReportKind}; use chumsky::prelude::*; use std::fmt; pub type Span = SimpleSpan; #[derive(Clone, Debug, PartialEq)] pub enum Token<'src> { // atomic types Boolean(bool), Number(f64), String(&'src str), Word(&'src str), Keyword(&'src str), Pkgkeyword(&'src str), Ignored(&'src str), // reserved words As, Box, Do, Else, Fn, If, Import, Let, Loop, Match, Nil, Ns, Panic, Pkg, Recur, Repeat, Test, Then, Use, When, With, // punctuation Arrow, Comma, Equals, Lbrace, Lbracket, Lparen, Newline, Pipeline, Placeholder, Rbrace, Rbracket, Rparen, Semi, Splat, Startdict, Startset, } impl<'src> fmt::Display for Token<'src> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Token::Boolean(b) => write!(f, "{}", b), Token::Number(n) => write!(f, "{}", n), Token::String(s) => write!(f, "{}", s), Token::Word(w) => write!(f, "{}", w), Token::Keyword(k) => write!(f, ":{}", k), Token::Ignored(i) => write!(f, "_{}", i), Token::Pkgkeyword(k) => write!(f, ":{}", k), Token::As => write!(f, "as"), Token::Box => write!(f, "box"), Token::Do => write!(f, "do"), Token::Else => write!(f, "else"), Token::Fn => write!(f, "fn"), Token::If => write!(f, "if"), Token::Import => write!(f, "import"), Token::Let => write!(f, "let"), Token::Loop => write!(f, "loop"), Token::Match => write!(f, "match"), Token::Nil => write!(f, "nil"), Token::Ns => write!(f, "ns"), Token::Panic => write!(f, "panic!"), Token::Pkg => write!(f, "pkg"), Token::Recur => write!(f, "recur"), Token::Repeat => write!(f, "repeat"), Token::Test => write!(f, "test"), Token::Then => write!(f, "then"), Token::Use => write!(f, "use"), Token::When => write!(f, "when"), Token::With => write!(f, "with"), Token::Arrow => write!(f, "->"), Token::Comma => write!(f, ","), Token::Equals => write!(f, "="), Token::Lbrace => write!(f, "{{"), Token::Lbracket => write!(f, "["), Token::Lparen => write!(f, "("), Token::Newline => write!(f, "\\n"), Token::Pipeline => write!(f, ">"), Token::Placeholder => write!(f, "_"), Token::Rbrace => write!(f, "}}"), Token::Rbracket => write!(f, "]"), Token::Rparen => write!(f, ")"), Token::Semi => write!(f, ";"), Token::Splat => write!(f, "..."), Token::Startdict => write!(f, "#{{"), Token::Startset => write!(f, "${{"), } } } pub fn lexer<'src>( ) -> impl Parser<'src, &'src str, Vec<(Token<'src>, Span)>, extra::Err>> { let string = just('"') .ignore_then(none_of('"').repeated().to_slice()) .then_ignore(just('"')) .map(Token::String); let word = any() .filter(char::is_ascii_lowercase) .then( any() .filter(char::is_ascii_alphanumeric) .or(one_of("*_/!?")), ) .repeated() .to_slice(); let keyword = just(':').ignore_then(word.clone()).map(Token::Keyword); let number = just('-') .or_not() .then(text::int(10).then(just('.').then(text::digits(10)).or_not())) .to_slice() .from_str() .unwrapped() .map(Token::Number); let reserved_or_word = word.map(|word: &str| match word { "as" => Token::As, "box" => Token::Box, "do" => Token::Do, "else" => Token::Else, "false" => Token::Boolean(false), "fn" => Token::Fn, "if" => Token::If, "import" => Token::Import, "let" => Token::Let, "loop" => Token::Loop, "match" => Token::Match, "nil" => Token::Nil, "ns" => Token::Ns, "panic!" => Token::Panic, // won't match until C-style ident -> Ludus word "pkg" => Token::Pkg, "recur" => Token::Recur, "repeat" => Token::Repeat, "test" => Token::Test, "then" => Token::Then, "true" => Token::Boolean(true), "use" => Token::Use, "when" => Token::When, "with" => Token::With, _ => Token::Word(word), }); let arrow = just("->").to(Token::Arrow); let comma = just(',').to(Token::Comma); let semicolon = just(';').to(Token::Semi); let placeholder = just('_').to(Token::Placeholder); let control = arrow.or(comma).or(semicolon).or(placeholder); let comment = just('&') .then(any().and_is(just('\n').not()).repeated()) .padded(); let atom = number.or(string).or(keyword).or(reserved_or_word); atom.or(control) .map_with(|tok, e| (tok, e.span())) .padded_by(comment.repeated()) .padded() } #[cfg(test)] mod tests { use crate::lexer; use crate::Token; use chumsky::{container::Seq, prelude::*}; #[test] fn it_works() { let toks = lexer().parse("42").unwrap(); let (tok, _) = toks[0].clone(); assert_eq!(tok, Token::Number(42.0)); } }