Fiddle with the scanner.

This commit is contained in:
Scott Richmond 2021-12-28 18:24:42 -05:00
parent 425c021fa1
commit 6b00dfe1f5

View File

@ -14,12 +14,14 @@
"false" "false"
"as" "as"
"ref" "ref"
"swap" "swap"})
;; other possibilities ;; other possibilities
;; "pattern" -- first class patterns? ;; "pattern" -- first class patterns?
})
(defn- new-scanner [source] (defn- new-scanner
"Creates a new scanner."
[source]
{::source source {::source source
::length (count source) ::length (count source)
::errors [] ::errors []
@ -28,16 +30,29 @@
::line 1 ::line 1
::tokens []}) ::tokens []})
(defn- at-end? [scanner] (defn- at-end?
"Tests if a scanner is at end of input."
[scanner]
(>= (::current scanner) (::length scanner))) (>= (::current scanner) (::length scanner)))
(defn- current-char [scanner] (defn- current-char
"Gets the current character of the scanner."
[scanner]
(nth (::source scanner) (::current scanner))) (nth (::source scanner) (::current scanner)))
(defn- advance [scanner] (defn- advance
"Advances the scanner by a single character."
[scanner]
(update scanner ::current inc)) (update scanner ::current inc))
(defn- current-lexeme [scanner] (defn- next-char
"Gets the next character from the scanner."
[scanner]
(current-char (advance scanner)))
(defn- current-lexeme
[scanner]
(subs (::source scanner) (::start scanner) (::current scanner))) (subs (::source scanner) (::start scanner) (::current scanner)))
(defn- char-in-range? [start end char] (defn- char-in-range? [start end char]
@ -58,8 +73,11 @@
(defn- whitespace? [c] (defn- whitespace? [c]
(or (= c \space) (= c \tab) (= c \return) (= c \,))) (or (= c \space) (= c \tab) (= c \return) (= c \,)))
(def terminators #{\: \; \newline \{ \( \[ \$ \# \- \< \&})
(defn- terminates? [c] (defn- terminates? [c]
(or (whitespace? c) (= c \:) (= c \newline))) (or (whitespace? c) (contains? terminators c)))
(defn- add-token (defn- add-token
([scanner token-type] ([scanner token-type]
@ -67,13 +85,32 @@
([scanner token-type literal] ([scanner token-type literal]
(update scanner ::tokens conj (token/token token-type (current-lexeme scanner) literal (::line scanner))))) (update scanner ::tokens conj (token/token token-type (current-lexeme scanner) literal (::line scanner)))))
(defn- add-error [scanner msg])
(defn- scan-keyword
([scanner] (scan-keyword scanner scanner))
([start current])
)
(defn- add-keyword [scanner]
(let [advanced (advance scanner)
char (current-char advanced)]
(if (not (alpha? char))
(add-error scanner (str "Keywords must start with a letter, e.g. :foo. Got " \: char))
(scan-keyword advanced))))
(defn- add-number [scanner]) (defn- add-number [scanner])
(defn- add-string [scanner]) (defn- add-string [scanner])
(defn- add-comment [scanner])
(defn- add-word [scanner])
(defn- scan-token [scanner] (defn- scan-token [scanner]
(let [char (current-char scanner) (let [char (current-char scanner)
scanner (advance scanner)] scanner (advance scanner)
next (next-char scanner)]
(case char (case char
;; one-character tokens ;; one-character tokens
\( (add-token scanner ::token/lparen) \( (add-token scanner ::token/lparen)
@ -83,25 +120,42 @@
\[ (add-token scanner ::token/lbracket) \[ (add-token scanner ::token/lbracket)
\] (add-token scanner ::token/rbracket) \] (add-token scanner ::token/rbracket)
\; (add-token scanner ::token/semicolon) \; (add-token scanner ::token/semicolon)
\_ (add-token scanner ::token/placeholder)
\newline (add-token scanner ::token/newline) \newline (add-token scanner ::token/newline)
;; two-character tokens ;; two-character tokens
;; -> ;; ->
\- (if (= next \>)
(add-token (advance (advance scanner)) ::token/rarrow)
(add-error scanner (str "Expected ->. Got " char next)))
;; <- ;; <-
;; // \< (if (= next \-)
(add-token (advance (advance scanner)) ::token/larrow)
(add-error scanner (str "Expected <-. Got " char next)))
;; begin hashmap #{ ;; begin hashmap #{
\# (if (= next \{)
(add-token (advance (advance scanner)) ::token/starthash)
(add-error scanner (str "Expected beginning of hash: #{. Got " char next)))
;; begin set ${ ;; begin set ${
\$ (if (= next \{)
(add-token (advance (advance scanner)) ::token/startset)
(add-error scanner (str "Expected beginning of set: ${. Got " char next)))
;; comments
;; &
\& (add-comment scanner)
;; keywords ;; keywords
;;\: (add-keyword scanner) \: (add-keyword scanner)
;; strings ;; strings
;;\" \" (add-string scanner)
;; word matches ;; word matches
(comment (cond (comment (cond
(digit? char) (add-number scanner) (digit? char) (add-number scanner)
(alpha? char) (add-word scanner) (alpha? char) (add-word scanner)
(= \_ char) (add-placeholder scanner)
:else (add-error scanner (str "Unexpected character: " char))))))) :else (add-error scanner (str "Unexpected character: " char)))))))
(defn- next-token [scanner] (defn- next-token [scanner]