Fix a bunch of things, hack some
This commit is contained in:
parent
5777458f2d
commit
1ca1e4df25
|
@ -1,5 +1,6 @@
|
||||||
(ns ludus.scanner
|
(ns ludus.scanner
|
||||||
(:require [ludus.token :as token]))
|
(:require [ludus.token :as token]
|
||||||
|
[clojure.pprint :as pp]))
|
||||||
|
|
||||||
(def reserved-words
|
(def reserved-words
|
||||||
"List of Ludus reserved words."
|
"List of Ludus reserved words."
|
||||||
|
@ -9,12 +10,13 @@
|
||||||
"else"
|
"else"
|
||||||
"nil"
|
"nil"
|
||||||
"match"
|
"match"
|
||||||
"with"
|
|
||||||
"true"
|
"true"
|
||||||
"false"
|
"false"
|
||||||
|
"loop"
|
||||||
|
"recur"
|
||||||
"as"
|
"as"
|
||||||
"ref"
|
"ref"
|
||||||
"swap"})
|
"mut"})
|
||||||
;; other possibilities
|
;; other possibilities
|
||||||
;; "pattern" -- first class patterns?
|
;; "pattern" -- first class patterns?
|
||||||
|
|
||||||
|
@ -38,7 +40,7 @@
|
||||||
(defn- current-char
|
(defn- current-char
|
||||||
"Gets the current character of the scanner."
|
"Gets the current character of the scanner."
|
||||||
[scanner]
|
[scanner]
|
||||||
(nth (::source scanner) (::current scanner)))
|
(nth (::source scanner) (::current scanner) nil))
|
||||||
|
|
||||||
(defn- advance
|
(defn- advance
|
||||||
"Advances the scanner by a single character."
|
"Advances the scanner by a single character."
|
||||||
|
@ -51,7 +53,6 @@
|
||||||
(current-char (advance scanner)))
|
(current-char (advance scanner)))
|
||||||
|
|
||||||
(defn- current-lexeme
|
(defn- current-lexeme
|
||||||
|
|
||||||
[scanner]
|
[scanner]
|
||||||
(subs (::source scanner) (::start scanner) (::current scanner)))
|
(subs (::source scanner) (::start scanner) (::current scanner)))
|
||||||
|
|
||||||
|
@ -69,12 +70,10 @@
|
||||||
(defn- alpha? [c]
|
(defn- alpha? [c]
|
||||||
(boolean (re-find #"\p{L}" (str c))))
|
(boolean (re-find #"\p{L}" (str c))))
|
||||||
|
|
||||||
;; Note that commas are whitespace in Ludus
|
|
||||||
(defn- whitespace? [c]
|
(defn- whitespace? [c]
|
||||||
(or (= c \space) (= c \tab) (= c \return) (= c \,)))
|
(or (= c \space) (= c \tab)))
|
||||||
|
|
||||||
|
(def terminators #{\: \; \newline \{ \( \[ \$ \# \- \< \& \,})
|
||||||
(def terminators #{\: \; \newline \{ \( \[ \$ \# \- \< \&})
|
|
||||||
|
|
||||||
(defn- terminates? [c]
|
(defn- terminates? [c]
|
||||||
(or (whitespace? c) (contains? terminators c)))
|
(or (whitespace? c) (contains? terminators c)))
|
||||||
|
@ -85,12 +84,12 @@
|
||||||
([scanner token-type literal]
|
([scanner token-type literal]
|
||||||
(update scanner ::tokens conj (token/token token-type (current-lexeme scanner) literal (::line scanner)))))
|
(update scanner ::tokens conj (token/token token-type (current-lexeme scanner) literal (::line scanner)))))
|
||||||
|
|
||||||
(defn- add-error [scanner msg])
|
(defn- add-error [scanner msg]
|
||||||
|
(update scanner ::errors conj {:msg msg :line (::line scanner)}))
|
||||||
|
|
||||||
(defn- scan-keyword
|
(defn- scan-keyword
|
||||||
([scanner] (scan-keyword scanner scanner))
|
([scanner] (scan-keyword scanner scanner))
|
||||||
([start current])
|
([start current]))
|
||||||
)
|
|
||||||
|
|
||||||
(defn- add-keyword [scanner]
|
(defn- add-keyword [scanner]
|
||||||
(let [advanced (advance scanner)
|
(let [advanced (advance scanner)
|
||||||
|
@ -99,18 +98,31 @@
|
||||||
(add-error scanner (str "Keywords must start with a letter, e.g. :foo. Got " \: char))
|
(add-error scanner (str "Keywords must start with a letter, e.g. :foo. Got " \: char))
|
||||||
(scan-keyword advanced))))
|
(scan-keyword advanced))))
|
||||||
|
|
||||||
(defn- add-number [scanner])
|
(defn- add-zero-start [scanner])
|
||||||
|
|
||||||
(defn- add-string [scanner])
|
(defn- add-number [scanner]
|
||||||
|
(let [current (current-char scanner)]
|
||||||
|
(if (nonzero-digit? current)
|
||||||
|
(loop [current current]))))
|
||||||
|
|
||||||
(defn- add-comment [scanner])
|
;; I am working here--trying to figure out how to add a string token
|
||||||
|
(defn- add-string
|
||||||
|
([scanner] (add-string scanner "")
|
||||||
|
([scanner string]
|
||||||
|
(let [char (current-char scanner)]))))
|
||||||
|
|
||||||
(defn- add-word [scanner])
|
(defn- add-word [scanner])
|
||||||
|
|
||||||
|
(defn- skip-comment [scanner]
|
||||||
|
(if (= \newline (current-char scanner))
|
||||||
|
(advance scanner)
|
||||||
|
(recur (advance scanner))))
|
||||||
|
|
||||||
(defn- scan-token [scanner]
|
(defn- scan-token [scanner]
|
||||||
(let [char (current-char scanner)
|
(let [char (current-char scanner)
|
||||||
scanner (advance scanner)
|
scanner (advance scanner)
|
||||||
next (next-char scanner)]
|
next (current-char scanner)
|
||||||
|
]
|
||||||
(case char
|
(case char
|
||||||
;; one-character tokens
|
;; one-character tokens
|
||||||
\( (add-token scanner ::token/lparen)
|
\( (add-token scanner ::token/lparen)
|
||||||
|
@ -120,31 +132,44 @@
|
||||||
\[ (add-token scanner ::token/lbracket)
|
\[ (add-token scanner ::token/lbracket)
|
||||||
\] (add-token scanner ::token/rbracket)
|
\] (add-token scanner ::token/rbracket)
|
||||||
\; (add-token scanner ::token/semicolon)
|
\; (add-token scanner ::token/semicolon)
|
||||||
\_ (add-token scanner ::token/placeholder)
|
\, (add-token scanner ::token/comma)
|
||||||
\newline (add-token scanner ::token/newline)
|
\newline (add-token scanner ::token/newline)
|
||||||
|
|
||||||
;; two-character tokens
|
;; two-character tokens
|
||||||
;; ->
|
;; ->
|
||||||
\- (if (= next \>)
|
\- (if (= next \>)
|
||||||
(add-token (advance (advance scanner)) ::token/rarrow)
|
(add-token (advance scanner) ::token/rarrow)
|
||||||
(add-error scanner (str "Expected ->. Got " char next)))
|
(add-error scanner (str "Expected ->. Got " char next)))
|
||||||
;; <-
|
;; <-
|
||||||
\< (if (= next \-)
|
\< (if (= next \-)
|
||||||
(add-token (advance (advance scanner)) ::token/larrow)
|
(add-token (advance scanner) ::token/larrow)
|
||||||
(add-error scanner (str "Expected <-. Got " char next)))
|
(add-error scanner (str "Expected <-. Got " char next)))
|
||||||
|
|
||||||
;; begin hashmap #{
|
;; |>
|
||||||
|
\| (if (= next \>)
|
||||||
|
(add-token (advance scanner) ::token/pipeline)
|
||||||
|
(add-error scanner (str "Expected |>. Got " char next)))
|
||||||
|
|
||||||
|
;; possible additional operator: => (bind)
|
||||||
|
|
||||||
|
;; hashmap #{
|
||||||
\# (if (= next \{)
|
\# (if (= next \{)
|
||||||
(add-token (advance (advance scanner)) ::token/starthash)
|
(add-token (advance scanner) ::token/starthash)
|
||||||
(add-error scanner (str "Expected beginning of hash: #{. Got " char next)))
|
(add-error scanner (str "Expected beginning of hash: #{. Got " char next)))
|
||||||
;; begin set ${
|
|
||||||
|
;; set ${
|
||||||
\$ (if (= next \{)
|
\$ (if (= next \{)
|
||||||
(add-token (advance (advance scanner)) ::token/startset)
|
(add-token (advance scanner) ::token/startset)
|
||||||
(add-error scanner (str "Expected beginning of set: ${. Got " char next)))
|
(add-error scanner (str "Expected beginning of set: ${. Got " char next)))
|
||||||
|
|
||||||
|
;; placeholder
|
||||||
|
\_ (if (terminates? next)
|
||||||
|
(add-token scanner ::token/placeholder)
|
||||||
|
(add-word scanner))
|
||||||
|
|
||||||
;; comments
|
;; comments
|
||||||
;; &
|
;; &
|
||||||
\& (add-comment scanner)
|
\& (skip-comment scanner)
|
||||||
|
|
||||||
;; keywords
|
;; keywords
|
||||||
\: (add-keyword scanner)
|
\: (add-keyword scanner)
|
||||||
|
@ -153,10 +178,11 @@
|
||||||
\" (add-string scanner)
|
\" (add-string scanner)
|
||||||
|
|
||||||
;; word matches
|
;; word matches
|
||||||
(comment (cond
|
(cond
|
||||||
(digit? char) (add-number scanner)
|
(whitespace? char) scanner
|
||||||
(alpha? char) (add-word scanner)
|
;; (digit? char) (add-number scanner)
|
||||||
:else (add-error scanner (str "Unexpected character: " char)))))))
|
;; (alpha? char) (add-word scanner)
|
||||||
|
:else (add-error scanner (str "Unexpected character: " char))))))
|
||||||
|
|
||||||
(defn- next-token [scanner]
|
(defn- next-token [scanner]
|
||||||
(assoc scanner ::start (::current scanner)))
|
(assoc scanner ::start (::current scanner)))
|
||||||
|
@ -165,5 +191,11 @@
|
||||||
(loop [scanner (new-scanner source)]
|
(loop [scanner (new-scanner source)]
|
||||||
(if (at-end? scanner)
|
(if (at-end? scanner)
|
||||||
(let [scanner (add-token scanner ::eof)]
|
(let [scanner (add-token scanner ::eof)]
|
||||||
[(::tokens scanner) (::errors scanner)])
|
{:tokens (::tokens scanner)
|
||||||
|
:errors (::errors scanner)})
|
||||||
(recur (-> scanner (scan-token) (next-token))))))
|
(recur (-> scanner (scan-token) (next-token))))))
|
||||||
|
|
||||||
|
|
||||||
|
(let [source "|)"]
|
||||||
|
(scan source))
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user