Atoms keep tokens. Add panic mode, begin better errors.

This commit is contained in:
Scott Richmond 2022-02-20 18:40:16 -05:00
parent 2751b7428f
commit 3fde2cb52b

View File

@ -7,7 +7,7 @@
;; a parser map and some functions to work with them ;; a parser map and some functions to work with them
(defn- parser [tokens] (defn- parser [tokens]
{::tokens tokens ::token 0 ::ast {}}) {::tokens tokens ::token 0 ::ast {} ::errors []})
(defn- current [parser] (defn- current [parser]
(nth (::tokens parser) (::token parser) nil)) (nth (::tokens parser) (::token parser) nil))
@ -29,13 +29,43 @@
(declare parse-expr) (declare parse-expr)
(declare parse-word) (declare parse-word)
;; handle some errors
(defn- sync [parser message origin end]
(println "Synching on " (current parser))
(let [poison {
::ast/type ::ast/poison
:message message
:origin origin
:end end
}]
(-> parser
(assoc ::ast poison)
(update ::errors conj poison))))
(defn- poisoned? [parser]
(= ::ast/poison (get-in parser [::ast ::ast/type])))
(defn- panic [parser message sync-on]
(println "PANIC!!! in the parser")
(let [origin (current parser)]
(loop [parser parser]
(let [
curr (current parser)
type (::token/type curr)
]
(if (or (= ::token/eof type) (contains? sync-on type))
(sync parser message origin curr)
(recur (advance parser)))))))
;; various parsing functions ;; various parsing functions
(defn- parse-atom [parser token] (defn- parse-atom [parser]
(let [token (current parser)]
(-> parser (-> parser
(advance) (advance)
(assoc ::ast { (assoc ::ast {
::ast/type ::ast/atom ::ast/type ::ast/atom
:value (::token/literal token)}))) :token token
:value (::token/literal token)}))))
;; just a quick and dirty map to associate atomic words with values ;; just a quick and dirty map to associate atomic words with values
(def atomic-words { (def atomic-words {
@ -43,12 +73,14 @@
::token/true true ::token/true true
::token/false false}) ::token/false false})
(defn parse-atomic-word [parser token] (defn parse-atomic-word [parser]
(let [token (current parser)]
(-> parser (-> parser
(advance) (advance)
(assoc ::ast { (assoc ::ast {
::ast/type ::ast/atom ::ast/type ::ast/atom
:value (get atomic-words (::token/type token))}))) :token token
:value (get atomic-words (::token/type token))}))))
(defn- add-member [members member] (defn- add-member [members member]
@ -70,7 +102,11 @@
(::token/comma ::token/newline) (recur (advance parser) (add-member members current_member) nil) (::token/comma ::token/newline) (recur (advance parser) (add-member members current_member) nil)
(let [parsed (parse-expr parser)] (let [parsed (parse-expr parser)]
(recur parsed members (::ast parsed))) (if (= ::ast/poison (get-in parsed [::ast ::ast/type]))
(panic parsed (:message (::ast parsed)) #{::token/rparen})
(recur parsed members (::ast parsed))
)
)
)))) ))))
@ -134,6 +170,12 @@
(loop [parser parser (loop [parser parser
exprs [] exprs []
current_expr nil] current_expr nil]
(comment (println "*** Parsing script")
(print "Exprs: ")
(pp/pprint exprs)
(print "Current expr: ")
(pp/pprint current_expr)
(println "Current token type " (::token/type (current parser))))
(case (::token/type (current parser)) (case (::token/type (current parser))
::token/eof (assoc parser ::ast ::token/eof (assoc parser ::ast
{::ast/type ::ast/script :exprs (add-member exprs current_expr)}) {::ast/type ::ast/script :exprs (add-member exprs current_expr)})
@ -142,11 +184,14 @@
(recur (advance parser) (add-member exprs current_expr) nil) (recur (advance parser) (add-member exprs current_expr) nil)
(if current_expr (if current_expr
(-> parser (if (poisoned? current_expr)
(advance) (panic parser (:message current_expr) #{::token/newline ::token/semicolon})
(assoc ::ast {::ast/type ::ast/poison :message "Expected end of expression"})) (let [synced (panic parser "Expected end of expression" #{::token/newline ::token/semicolon})]
(recur synced exprs (::ast synced))
))
(let [parsed (parse-expr parser)] (let [parsed (parse-expr parser)]
(recur parsed exprs (::ast parsed)))) (recur parsed exprs (::ast parsed))
))
))) )))
@ -157,7 +202,7 @@
type (::token/type curr)] type (::token/type curr)]
(case type (case type
::token/keyword ::token/keyword
(recur (advance parser) (conj terms (::ast (parse-atom parser curr)))) (recur (advance parser) (conj terms (::ast (parse-atom parser))))
::token/word ::token/word
(recur (advance parser) (conj terms (::ast (parse-word parser)))) (recur (advance parser) (conj terms (::ast (parse-word parser))))
@ -183,7 +228,7 @@
(case type (case type
::token/word (parse-word parser) ::token/word (parse-word parser)
(::token/number ::token/string ::token/keyword) (parse-atom parser curr) (::token/number ::token/string ::token/keyword) (parse-atom parser)
(-> parser (-> parser
(advance) (advance)
@ -248,18 +293,27 @@
})) }))
)) ))
(def expr-sync #{
::token/newline
::token/semicolon
::token/comma
::token/rparen
::token/rbracket
::token/rbrace
})
(defn- parse-expr [parser] (defn- parse-expr [parser]
(let [token (current parser)] (let [token (current parser)]
(case (::token/type token) (case (::token/type token)
(::token/number ::token/string) (::token/number ::token/string)
(parse-atom parser token) (parse-atom parser)
::token/keyword (let [next (peek parser) ::token/keyword (let [next (peek parser)
type (::token/type next)] type (::token/type next)]
(if (= type ::token/lparen) (if (= type ::token/lparen)
(parse-synthetic parser) (parse-synthetic parser)
(parse-atom parser token))) (parse-atom parser)))
::token/word (let [next (peek parser) ::token/word (let [next (peek parser)
type (::token/type next)] type (::token/type next)]
@ -268,7 +322,7 @@
(parse-word parser))) (parse-word parser)))
(::token/nil ::token/true ::token/false) (::token/nil ::token/true ::token/false)
(parse-atomic-word parser token) (parse-atomic-word parser)
::token/lparen (parse-tuple parser) ::token/lparen (parse-tuple parser)
@ -282,40 +336,52 @@
::token/if (parse-if parser) ::token/if (parse-if parser)
(-> parser ::token/error (panic parser (:message token)
(advance) #{
(assoc ::ast {::ast/type ::ast/poison :message "Expected expression"})) ::token/newline
::token/semicolon
::token/comma
::token/rparen
::token/rbracket
::token/rbrace
})
(::token/rparen ::token/rbrace ::token/rbracket)
(panic parser (str "Unbalanced enclosure: " (::token/lexeme token)) expr-sync)
(::token/semicolon ::token/comma)
(panic parser (str "Unexpected delimiter: " (::token/lexeme token)) expr-sync)
(panic parser "Expected expression" expr-sync)
))) )))
(do (do
(def source "if let foo = :foo (def pp pp/pprint)
then { (def source "(foo, bar, baz^, } )
bar (baz) :quux :foo (bar)
}
else [
(42)
12
:twenty-three
foo (bar) (baz) :quux
(false, nil, ())
]")
(def tokens (:tokens (scanner/scan source)))
[1, 2, 3]")
(def lexed (scanner/scan source))
(def tokens (:tokens lexed))
(def p (parser tokens)) (def p (parser tokens))
(-> (parse-script p) (-> p
(parse-script)
(::ast) (::ast)
(pp/pprint))) (pp)
)
)
(comment " (comment "
Further thoughts/still to do: Further thoughts/still to do:
* Clean up the parsing functions: * Clean up the parsing functions:
- use accept-many in blocks and scripts - use accept-many in blocks and scripts
- parse-atom (and other parse functions) should take only a parser - ast nodes should include their tokens (this is added for atoms, which may be fully sufficient)
- ast nodes should include their tokens
* Time to start working on parsing errors (poisoned nodes, panic mode, etc.) * Time to start working on parsing errors (poisoned nodes, panic mode, etc.)
- this works (ish) for expr, script, tuple
- add to everything else
- investigate duplicated/missing error messages
Other quick thoughts: Other quick thoughts:
* Once I get this far, then it's time to wire up the interpreter (with hard-coded functions, and the beginning of static analysis) * Once I get this far, then it's time to wire up the interpreter (with hard-coded functions, and the beginning of static analysis)