diff --git a/src/ludus/scanner.clj b/src/ludus/scanner.clj index 9e00e5e..03b312b 100644 --- a/src/ludus/scanner.clj +++ b/src/ludus/scanner.clj @@ -1,5 +1,6 @@ (ns ludus.scanner - (:require [ludus.token :as token])) + (:require [ludus.token :as token] + [clojure.pprint :as pp])) (def reserved-words "List of Ludus reserved words." @@ -9,12 +10,13 @@ "else" "nil" "match" - "with" "true" "false" + "loop" + "recur" "as" "ref" - "swap"}) + "mut"}) ;; other possibilities ;; "pattern" -- first class patterns? @@ -38,7 +40,7 @@ (defn- current-char "Gets the current character of the scanner." [scanner] - (nth (::source scanner) (::current scanner))) + (nth (::source scanner) (::current scanner) nil)) (defn- advance "Advances the scanner by a single character." @@ -51,7 +53,6 @@ (current-char (advance scanner))) (defn- current-lexeme - [scanner] (subs (::source scanner) (::start scanner) (::current scanner))) @@ -69,12 +70,10 @@ (defn- alpha? [c] (boolean (re-find #"\p{L}" (str c)))) -;; Note that commas are whitespace in Ludus (defn- whitespace? [c] - (or (= c \space) (= c \tab) (= c \return) (= c \,))) + (or (= c \space) (= c \tab))) - -(def terminators #{\: \; \newline \{ \( \[ \$ \# \- \< \&}) +(def terminators #{\: \; \newline \{ \( \[ \$ \# \- \< \& \,}) (defn- terminates? [c] (or (whitespace? c) (contains? terminators c))) @@ -85,32 +84,45 @@ ([scanner token-type literal] (update scanner ::tokens conj (token/token token-type (current-lexeme scanner) literal (::line scanner))))) -(defn- add-error [scanner msg]) +(defn- add-error [scanner msg] + (update scanner ::errors conj {:msg msg :line (::line scanner)})) (defn- scan-keyword ([scanner] (scan-keyword scanner scanner)) - ([start current]) - ) + ([start current])) (defn- add-keyword [scanner] (let [advanced (advance scanner) char (current-char advanced)] (if (not (alpha? char)) (add-error scanner (str "Keywords must start with a letter, e.g. :foo. Got " \: char)) - (scan-keyword advanced)))) + (scan-keyword advanced)))) -(defn- add-number [scanner]) +(defn- add-zero-start [scanner]) -(defn- add-string [scanner]) +(defn- add-number [scanner] + (let [current (current-char scanner)] + (if (nonzero-digit? current) + (loop [current current])))) -(defn- add-comment [scanner]) +;; I am working here--trying to figure out how to add a string token +(defn- add-string + ([scanner] (add-string scanner "") + ([scanner string] + (let [char (current-char scanner)])))) (defn- add-word [scanner]) +(defn- skip-comment [scanner] + (if (= \newline (current-char scanner)) + (advance scanner) + (recur (advance scanner)))) + (defn- scan-token [scanner] (let [char (current-char scanner) scanner (advance scanner) - next (next-char scanner)] + next (current-char scanner) + ] (case char ;; one-character tokens \( (add-token scanner ::token/lparen) @@ -120,31 +132,44 @@ \[ (add-token scanner ::token/lbracket) \] (add-token scanner ::token/rbracket) \; (add-token scanner ::token/semicolon) - \_ (add-token scanner ::token/placeholder) + \, (add-token scanner ::token/comma) \newline (add-token scanner ::token/newline) ;; two-character tokens ;; -> \- (if (= next \>) - (add-token (advance (advance scanner)) ::token/rarrow) + (add-token (advance scanner) ::token/rarrow) (add-error scanner (str "Expected ->. Got " char next))) ;; <- \< (if (= next \-) - (add-token (advance (advance scanner)) ::token/larrow) + (add-token (advance scanner) ::token/larrow) (add-error scanner (str "Expected <-. Got " char next))) - ;; begin hashmap #{ + ;; |> + \| (if (= next \>) + (add-token (advance scanner) ::token/pipeline) + (add-error scanner (str "Expected |>. Got " char next))) + + ;; possible additional operator: => (bind) + + ;; hashmap #{ \# (if (= next \{) - (add-token (advance (advance scanner)) ::token/starthash) + (add-token (advance scanner) ::token/starthash) (add-error scanner (str "Expected beginning of hash: #{. Got " char next))) - ;; begin set ${ + + ;; set ${ \$ (if (= next \{) - (add-token (advance (advance scanner)) ::token/startset) + (add-token (advance scanner) ::token/startset) (add-error scanner (str "Expected beginning of set: ${. Got " char next))) + ;; placeholder + \_ (if (terminates? next) + (add-token scanner ::token/placeholder) + (add-word scanner)) + ;; comments ;; & - \& (add-comment scanner) + \& (skip-comment scanner) ;; keywords \: (add-keyword scanner) @@ -153,10 +178,11 @@ \" (add-string scanner) ;; word matches - (comment (cond - (digit? char) (add-number scanner) - (alpha? char) (add-word scanner) - :else (add-error scanner (str "Unexpected character: " char))))))) + (cond + (whitespace? char) scanner + ;; (digit? char) (add-number scanner) + ;; (alpha? char) (add-word scanner) + :else (add-error scanner (str "Unexpected character: " char)))))) (defn- next-token [scanner] (assoc scanner ::start (::current scanner))) @@ -165,5 +191,11 @@ (loop [scanner (new-scanner source)] (if (at-end? scanner) (let [scanner (add-token scanner ::eof)] - [(::tokens scanner) (::errors scanner)]) - (recur (-> scanner (scan-token) (next-token)))))) \ No newline at end of file + {:tokens (::tokens scanner) + :errors (::errors scanner)}) + (recur (-> scanner (scan-token) (next-token)))))) + + +(let [source "|)"] + (scan source)) +