Atoms keep tokens. Add panic mode, begin better errors.
This commit is contained in:
parent
2751b7428f
commit
3fde2cb52b
|
@ -7,7 +7,7 @@
|
||||||
|
|
||||||
;; a parser map and some functions to work with them
|
;; a parser map and some functions to work with them
|
||||||
(defn- parser [tokens]
|
(defn- parser [tokens]
|
||||||
{::tokens tokens ::token 0 ::ast {}})
|
{::tokens tokens ::token 0 ::ast {} ::errors []})
|
||||||
|
|
||||||
(defn- current [parser]
|
(defn- current [parser]
|
||||||
(nth (::tokens parser) (::token parser) nil))
|
(nth (::tokens parser) (::token parser) nil))
|
||||||
|
@ -29,13 +29,43 @@
|
||||||
(declare parse-expr)
|
(declare parse-expr)
|
||||||
(declare parse-word)
|
(declare parse-word)
|
||||||
|
|
||||||
|
;; handle some errors
|
||||||
|
(defn- sync [parser message origin end]
|
||||||
|
(println "Synching on " (current parser))
|
||||||
|
(let [poison {
|
||||||
|
::ast/type ::ast/poison
|
||||||
|
:message message
|
||||||
|
:origin origin
|
||||||
|
:end end
|
||||||
|
}]
|
||||||
|
(-> parser
|
||||||
|
(assoc ::ast poison)
|
||||||
|
(update ::errors conj poison))))
|
||||||
|
|
||||||
|
(defn- poisoned? [parser]
|
||||||
|
(= ::ast/poison (get-in parser [::ast ::ast/type])))
|
||||||
|
|
||||||
|
(defn- panic [parser message sync-on]
|
||||||
|
(println "PANIC!!! in the parser")
|
||||||
|
(let [origin (current parser)]
|
||||||
|
(loop [parser parser]
|
||||||
|
(let [
|
||||||
|
curr (current parser)
|
||||||
|
type (::token/type curr)
|
||||||
|
]
|
||||||
|
(if (or (= ::token/eof type) (contains? sync-on type))
|
||||||
|
(sync parser message origin curr)
|
||||||
|
(recur (advance parser)))))))
|
||||||
|
|
||||||
;; various parsing functions
|
;; various parsing functions
|
||||||
(defn- parse-atom [parser token]
|
(defn- parse-atom [parser]
|
||||||
|
(let [token (current parser)]
|
||||||
(-> parser
|
(-> parser
|
||||||
(advance)
|
(advance)
|
||||||
(assoc ::ast {
|
(assoc ::ast {
|
||||||
::ast/type ::ast/atom
|
::ast/type ::ast/atom
|
||||||
:value (::token/literal token)})))
|
:token token
|
||||||
|
:value (::token/literal token)}))))
|
||||||
|
|
||||||
;; just a quick and dirty map to associate atomic words with values
|
;; just a quick and dirty map to associate atomic words with values
|
||||||
(def atomic-words {
|
(def atomic-words {
|
||||||
|
@ -43,12 +73,14 @@
|
||||||
::token/true true
|
::token/true true
|
||||||
::token/false false})
|
::token/false false})
|
||||||
|
|
||||||
(defn parse-atomic-word [parser token]
|
(defn parse-atomic-word [parser]
|
||||||
|
(let [token (current parser)]
|
||||||
(-> parser
|
(-> parser
|
||||||
(advance)
|
(advance)
|
||||||
(assoc ::ast {
|
(assoc ::ast {
|
||||||
::ast/type ::ast/atom
|
::ast/type ::ast/atom
|
||||||
:value (get atomic-words (::token/type token))})))
|
:token token
|
||||||
|
:value (get atomic-words (::token/type token))}))))
|
||||||
|
|
||||||
|
|
||||||
(defn- add-member [members member]
|
(defn- add-member [members member]
|
||||||
|
@ -70,7 +102,11 @@
|
||||||
(::token/comma ::token/newline) (recur (advance parser) (add-member members current_member) nil)
|
(::token/comma ::token/newline) (recur (advance parser) (add-member members current_member) nil)
|
||||||
|
|
||||||
(let [parsed (parse-expr parser)]
|
(let [parsed (parse-expr parser)]
|
||||||
(recur parsed members (::ast parsed)))
|
(if (= ::ast/poison (get-in parsed [::ast ::ast/type]))
|
||||||
|
(panic parsed (:message (::ast parsed)) #{::token/rparen})
|
||||||
|
(recur parsed members (::ast parsed))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
))))
|
))))
|
||||||
|
|
||||||
|
@ -134,6 +170,12 @@
|
||||||
(loop [parser parser
|
(loop [parser parser
|
||||||
exprs []
|
exprs []
|
||||||
current_expr nil]
|
current_expr nil]
|
||||||
|
(comment (println "*** Parsing script")
|
||||||
|
(print "Exprs: ")
|
||||||
|
(pp/pprint exprs)
|
||||||
|
(print "Current expr: ")
|
||||||
|
(pp/pprint current_expr)
|
||||||
|
(println "Current token type " (::token/type (current parser))))
|
||||||
(case (::token/type (current parser))
|
(case (::token/type (current parser))
|
||||||
::token/eof (assoc parser ::ast
|
::token/eof (assoc parser ::ast
|
||||||
{::ast/type ::ast/script :exprs (add-member exprs current_expr)})
|
{::ast/type ::ast/script :exprs (add-member exprs current_expr)})
|
||||||
|
@ -142,11 +184,14 @@
|
||||||
(recur (advance parser) (add-member exprs current_expr) nil)
|
(recur (advance parser) (add-member exprs current_expr) nil)
|
||||||
|
|
||||||
(if current_expr
|
(if current_expr
|
||||||
(-> parser
|
(if (poisoned? current_expr)
|
||||||
(advance)
|
(panic parser (:message current_expr) #{::token/newline ::token/semicolon})
|
||||||
(assoc ::ast {::ast/type ::ast/poison :message "Expected end of expression"}))
|
(let [synced (panic parser "Expected end of expression" #{::token/newline ::token/semicolon})]
|
||||||
|
(recur synced exprs (::ast synced))
|
||||||
|
))
|
||||||
(let [parsed (parse-expr parser)]
|
(let [parsed (parse-expr parser)]
|
||||||
(recur parsed exprs (::ast parsed))))
|
(recur parsed exprs (::ast parsed))
|
||||||
|
))
|
||||||
|
|
||||||
)))
|
)))
|
||||||
|
|
||||||
|
@ -157,7 +202,7 @@
|
||||||
type (::token/type curr)]
|
type (::token/type curr)]
|
||||||
(case type
|
(case type
|
||||||
::token/keyword
|
::token/keyword
|
||||||
(recur (advance parser) (conj terms (::ast (parse-atom parser curr))))
|
(recur (advance parser) (conj terms (::ast (parse-atom parser))))
|
||||||
|
|
||||||
::token/word
|
::token/word
|
||||||
(recur (advance parser) (conj terms (::ast (parse-word parser))))
|
(recur (advance parser) (conj terms (::ast (parse-word parser))))
|
||||||
|
@ -183,7 +228,7 @@
|
||||||
(case type
|
(case type
|
||||||
::token/word (parse-word parser)
|
::token/word (parse-word parser)
|
||||||
|
|
||||||
(::token/number ::token/string ::token/keyword) (parse-atom parser curr)
|
(::token/number ::token/string ::token/keyword) (parse-atom parser)
|
||||||
|
|
||||||
(-> parser
|
(-> parser
|
||||||
(advance)
|
(advance)
|
||||||
|
@ -248,18 +293,27 @@
|
||||||
}))
|
}))
|
||||||
))
|
))
|
||||||
|
|
||||||
|
(def expr-sync #{
|
||||||
|
::token/newline
|
||||||
|
::token/semicolon
|
||||||
|
::token/comma
|
||||||
|
::token/rparen
|
||||||
|
::token/rbracket
|
||||||
|
::token/rbrace
|
||||||
|
})
|
||||||
|
|
||||||
(defn- parse-expr [parser]
|
(defn- parse-expr [parser]
|
||||||
(let [token (current parser)]
|
(let [token (current parser)]
|
||||||
(case (::token/type token)
|
(case (::token/type token)
|
||||||
|
|
||||||
(::token/number ::token/string)
|
(::token/number ::token/string)
|
||||||
(parse-atom parser token)
|
(parse-atom parser)
|
||||||
|
|
||||||
::token/keyword (let [next (peek parser)
|
::token/keyword (let [next (peek parser)
|
||||||
type (::token/type next)]
|
type (::token/type next)]
|
||||||
(if (= type ::token/lparen)
|
(if (= type ::token/lparen)
|
||||||
(parse-synthetic parser)
|
(parse-synthetic parser)
|
||||||
(parse-atom parser token)))
|
(parse-atom parser)))
|
||||||
|
|
||||||
::token/word (let [next (peek parser)
|
::token/word (let [next (peek parser)
|
||||||
type (::token/type next)]
|
type (::token/type next)]
|
||||||
|
@ -268,7 +322,7 @@
|
||||||
(parse-word parser)))
|
(parse-word parser)))
|
||||||
|
|
||||||
(::token/nil ::token/true ::token/false)
|
(::token/nil ::token/true ::token/false)
|
||||||
(parse-atomic-word parser token)
|
(parse-atomic-word parser)
|
||||||
|
|
||||||
::token/lparen (parse-tuple parser)
|
::token/lparen (parse-tuple parser)
|
||||||
|
|
||||||
|
@ -282,40 +336,52 @@
|
||||||
|
|
||||||
::token/if (parse-if parser)
|
::token/if (parse-if parser)
|
||||||
|
|
||||||
(-> parser
|
::token/error (panic parser (:message token)
|
||||||
(advance)
|
#{
|
||||||
(assoc ::ast {::ast/type ::ast/poison :message "Expected expression"}))
|
::token/newline
|
||||||
|
::token/semicolon
|
||||||
|
::token/comma
|
||||||
|
::token/rparen
|
||||||
|
::token/rbracket
|
||||||
|
::token/rbrace
|
||||||
|
})
|
||||||
|
|
||||||
|
(::token/rparen ::token/rbrace ::token/rbracket)
|
||||||
|
(panic parser (str "Unbalanced enclosure: " (::token/lexeme token)) expr-sync)
|
||||||
|
|
||||||
|
(::token/semicolon ::token/comma)
|
||||||
|
(panic parser (str "Unexpected delimiter: " (::token/lexeme token)) expr-sync)
|
||||||
|
|
||||||
|
(panic parser "Expected expression" expr-sync)
|
||||||
|
|
||||||
)))
|
)))
|
||||||
|
|
||||||
(do
|
(do
|
||||||
(def source "if let foo = :foo
|
(def pp pp/pprint)
|
||||||
then {
|
(def source "(foo, bar, baz^, } )
|
||||||
bar (baz) :quux
|
:foo (bar)
|
||||||
}
|
|
||||||
else [
|
|
||||||
(42)
|
|
||||||
12
|
|
||||||
:twenty-three
|
|
||||||
foo (bar) (baz) :quux
|
|
||||||
(false, nil, ())
|
|
||||||
]")
|
|
||||||
|
|
||||||
(def tokens (:tokens (scanner/scan source)))
|
|
||||||
|
|
||||||
|
[1, 2, 3]")
|
||||||
|
(def lexed (scanner/scan source))
|
||||||
|
(def tokens (:tokens lexed))
|
||||||
(def p (parser tokens))
|
(def p (parser tokens))
|
||||||
|
|
||||||
(-> (parse-script p)
|
(-> p
|
||||||
|
(parse-script)
|
||||||
(::ast)
|
(::ast)
|
||||||
(pp/pprint)))
|
(pp)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
(comment "
|
(comment "
|
||||||
Further thoughts/still to do:
|
Further thoughts/still to do:
|
||||||
* Clean up the parsing functions:
|
* Clean up the parsing functions:
|
||||||
- use accept-many in blocks and scripts
|
- use accept-many in blocks and scripts
|
||||||
- parse-atom (and other parse functions) should take only a parser
|
- ast nodes should include their tokens (this is added for atoms, which may be fully sufficient)
|
||||||
- ast nodes should include their tokens
|
|
||||||
* Time to start working on parsing errors (poisoned nodes, panic mode, etc.)
|
* Time to start working on parsing errors (poisoned nodes, panic mode, etc.)
|
||||||
|
- this works (ish) for expr, script, tuple
|
||||||
|
- add to everything else
|
||||||
|
- investigate duplicated/missing error messages
|
||||||
|
|
||||||
Other quick thoughts:
|
Other quick thoughts:
|
||||||
* Once I get this far, then it's time to wire up the interpreter (with hard-coded functions, and the beginning of static analysis)
|
* Once I get this far, then it's time to wire up the interpreter (with hard-coded functions, and the beginning of static analysis)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user