diff --git a/src/ludus/scanner.clj b/src/ludus/scanner.clj index d5cc7a0..9e00e5e 100644 --- a/src/ludus/scanner.clj +++ b/src/ludus/scanner.clj @@ -14,12 +14,14 @@ "false" "as" "ref" - "swap" + "swap"}) ;; other possibilities ;; "pattern" -- first class patterns? - }) + -(defn- new-scanner [source] +(defn- new-scanner + "Creates a new scanner." + [source] {::source source ::length (count source) ::errors [] @@ -28,16 +30,29 @@ ::line 1 ::tokens []}) -(defn- at-end? [scanner] +(defn- at-end? + "Tests if a scanner is at end of input." + [scanner] (>= (::current scanner) (::length scanner))) -(defn- current-char [scanner] +(defn- current-char + "Gets the current character of the scanner." + [scanner] (nth (::source scanner) (::current scanner))) -(defn- advance [scanner] +(defn- advance + "Advances the scanner by a single character." + [scanner] (update scanner ::current inc)) -(defn- current-lexeme [scanner] +(defn- next-char + "Gets the next character from the scanner." + [scanner] + (current-char (advance scanner))) + +(defn- current-lexeme + + [scanner] (subs (::source scanner) (::start scanner) (::current scanner))) (defn- char-in-range? [start end char] @@ -58,8 +73,11 @@ (defn- whitespace? [c] (or (= c \space) (= c \tab) (= c \return) (= c \,))) + +(def terminators #{\: \; \newline \{ \( \[ \$ \# \- \< \&}) + (defn- terminates? [c] - (or (whitespace? c) (= c \:) (= c \newline))) + (or (whitespace? c) (contains? terminators c))) (defn- add-token ([scanner token-type] @@ -67,13 +85,32 @@ ([scanner token-type literal] (update scanner ::tokens conj (token/token token-type (current-lexeme scanner) literal (::line scanner))))) +(defn- add-error [scanner msg]) + +(defn- scan-keyword + ([scanner] (scan-keyword scanner scanner)) + ([start current]) + ) + +(defn- add-keyword [scanner] + (let [advanced (advance scanner) + char (current-char advanced)] + (if (not (alpha? char)) + (add-error scanner (str "Keywords must start with a letter, e.g. :foo. Got " \: char)) + (scan-keyword advanced)))) + (defn- add-number [scanner]) (defn- add-string [scanner]) +(defn- add-comment [scanner]) + +(defn- add-word [scanner]) + (defn- scan-token [scanner] (let [char (current-char scanner) - scanner (advance scanner)] + scanner (advance scanner) + next (next-char scanner)] (case char ;; one-character tokens \( (add-token scanner ::token/lparen) @@ -83,25 +120,42 @@ \[ (add-token scanner ::token/lbracket) \] (add-token scanner ::token/rbracket) \; (add-token scanner ::token/semicolon) + \_ (add-token scanner ::token/placeholder) \newline (add-token scanner ::token/newline) + ;; two-character tokens ;; -> + \- (if (= next \>) + (add-token (advance (advance scanner)) ::token/rarrow) + (add-error scanner (str "Expected ->. Got " char next))) ;; <- - ;; // + \< (if (= next \-) + (add-token (advance (advance scanner)) ::token/larrow) + (add-error scanner (str "Expected <-. Got " char next))) + ;; begin hashmap #{ + \# (if (= next \{) + (add-token (advance (advance scanner)) ::token/starthash) + (add-error scanner (str "Expected beginning of hash: #{. Got " char next))) ;; begin set ${ + \$ (if (= next \{) + (add-token (advance (advance scanner)) ::token/startset) + (add-error scanner (str "Expected beginning of set: ${. Got " char next))) + + ;; comments + ;; & + \& (add-comment scanner) ;; keywords - ;;\: (add-keyword scanner) + \: (add-keyword scanner) ;; strings - ;;\" + \" (add-string scanner) ;; word matches (comment (cond (digit? char) (add-number scanner) (alpha? char) (add-word scanner) - (= \_ char) (add-placeholder scanner) :else (add-error scanner (str "Unexpected character: " char))))))) (defn- next-token [scanner]