complete draft of parsing

This commit is contained in:
Scott Richmond 2024-05-11 23:25:36 -04:00
parent 806ec0e8f0
commit 2cfe9fdffc

View File

@ -259,13 +259,18 @@
(def sequels [:lparen :keyword]) (def sequels [:lparen :keyword])
(defn- word [parser] (defn- word-expr [parser]
(expect parser :word) (expect parser :word)
(if (has-value? sequels (-> parser peek type)) (break (synthetic parser))) (if (has-value? sequels (-> parser peek type)) (break (synthetic parser)))
(def curr (-> parser current)) (def curr (-> parser current))
(advance parser) (advance parser)
{:type :word :data (curr :lexeme) :token curr} {:type :word :data (curr :lexeme) :token curr})
)
(defn- word-only [parser]
(expect parser :word)
(def curr (current parser))
(advance parser)
{:type :word :data (curr :lexeme) :token curr})
(defn- args [parser] (defn- args [parser]
(def origin (current parser)) (def origin (current parser))
@ -289,7 +294,7 @@
(set (ast :partial) true) (set (ast :partial) true)
(advance parser) (advance parser)
{:type :placeholder :token origin})) {:type :placeholder :token origin}))
(try (nonbinding parser) ([e] e)))) (capture nonbinding parser)))
(array/push (ast :data) term) (array/push (ast :data) term)
(try (separators parser) (try (separators parser)
([e] (pp e) (array/push (ast :data) e)))) ([e] (pp e) (array/push (ast :data) e))))
@ -297,20 +302,21 @@
ast) ast)
(defn- synth-root [parser] (defn- synth-root [parser]
(print "parsing synth root")
(def origin (current parser)) (def origin (current parser))
(advance parser) (advance parser)
(case (type origin) (case (type origin)
:word {:type :word :data (origin :lexeme) :token origin} :word {:type :word :data (origin :lexeme) :token origin}
:keyword {:type :keyword :data (origin :literal) :token origin} :keyword {:type :keyword :data (origin :literal) :token origin}
:pkg-name {:type :pkg-name :data (origin :lexem) :token origin} :pkg-name {:type :pkg-name :data (origin :lexeme) :token origin}
(panic parser "expected word, keyword, or package") (panic parser "expected word, keyword, or package")
) )
) )
(defrec synthetic [parser] (defrec synthetic [parser]
(print "parsing synthetic")
(def origin (current parser)) (def origin (current parser))
(def ast {:type :synthetic :data @[(synth-root origin)] :token origin}) (def ast {:type :synthetic :data @[(synth-root parser)] :token origin})
(advance parser)
(while (has-value? sequels (-> parser current type)) (while (has-value? sequels (-> parser current type))
(def term (def term
(case (-> parser current type) (case (-> parser current type)
@ -333,7 +339,7 @@
(def err {:type :error :token origin :msg "unclosed paren"}) (def err {:type :error :token origin :msg "unclosed paren"})
(array/push (parser :errors) err) (array/push (parser :errors) err)
(error err)) (error err))
(def term (try (nonbinding parser) ([e] e))) (def term (capture nonbinding parser))
(array/push (ast :data) term) (array/push (ast :data) term)
(try (separators parser) (try (separators parser)
([e] (pp e) (array/push (ast :data) e)))) ([e] (pp e) (array/push (ast :data) e))))
@ -354,10 +360,10 @@
(def term (if (check parser :splat) (def term (if (check parser :splat)
(do (do
(advance parser) (advance parser)
(def splatted (try (word parser) ([e] e))) (def splatted (try (word-only parser) ([e] e)))
{:type :splat :data splatted :token origin} {:type :splat :data splatted :token origin}
) )
(try (nonbinding parser) ([e] e)))) (capture nonbinding parser)))
(array/push (ast :data) term) (array/push (ast :data) term)
(try (separators parser) (try (separators parser)
([e] (array/push (ast :data) e)))) ([e] (array/push (ast :data) e))))
@ -378,10 +384,10 @@
(def term (if (check parser :splat) (def term (if (check parser :splat)
(do (do
(advance parser) (advance parser)
(def splatted (capture word parser)) (def splatted (capture word-only parser))
{:type :splat :data splatted :token origin} {:type :splat :data splatted :token origin}
) )
(try (nonbinding parser) ([e] e)))) (capture nonbinding parser)))
(array/push (ast :data) term) (array/push (ast :data) term)
(try (separators parser) (try (separators parser)
([e] (array/push (ast :data) e)))) ([e] (array/push (ast :data) e))))
@ -400,11 +406,11 @@
(error err)) (error err))
(def origin (current parser)) (def origin (current parser))
(def term (case (type origin) (def term (case (type origin)
:splat {:type :splat :data (try (word (advance parser)) ([e] e)) :token origin} :splat {:type :splat :data (capture word-only (advance parser)) :token origin}
:word (try (word parser) ([e] e)) :word (try (word-only parser) ([e] e))
:keyword (do :keyword (do
(def key (try (kw parser) ([e] e))) (def key (try (kw parser) ([e] e)))
(def value (try (nonbinding parser) ([e] e))) (def value (capture nonbinding parser))
{:type :pair :data [key value] :token origin}) {:type :pair :data [key value] :token origin})
(try (panic parser (string expect "expected dict term, got " (type origin))) ([e] e)) (try (panic parser (string expect "expected dict term, got " (type origin))) ([e] e))
)) ))
@ -448,9 +454,9 @@
(def term (if (check parser :splat) (def term (if (check parser :splat)
(do (do
(advance parser) (advance parser)
(def splatted (if (check parser :word) (word parser) nil)) (def splatted (when (check parser :word) (word-only parser)))
{:type :splat :data splatted :token origin}) {:type :splat :data splatted :token origin})
(try (pattern parser) ([e] e)))) (capture pattern parser)))
(array/push (ast :data) term) (array/push (ast :data) term)
(try (separators parser) (try (separators parser)
([e] (pp e) (array/push (ast :data) e)))) ([e] (pp e) (array/push (ast :data) e))))
@ -471,9 +477,9 @@
(def term (if (check parser :splat) (def term (if (check parser :splat)
(do (do
(advance parser) (advance parser)
(def splatted (if (check parser :word) (word parser) nil)) (def splatted (when (check parser :word) (word-only parser)))
{:type :splat :data splatted :token origin}) {:type :splat :data splatted :token origin})
(try (pattern parser) ([e] e)))) (capture pattern parser)))
(array/push (ast :data) term) (array/push (ast :data) term)
(try (separators parser) (try (separators parser)
([e] (array/push (ast :data) e)))) ([e] (array/push (ast :data) e))))
@ -492,8 +498,8 @@
(error err)) (error err))
(def origin (current parser)) (def origin (current parser))
(def term (case (type origin) (def term (case (type origin)
:splat {:type :splat :data (try (word (advance parser)) ([_] nil)) :token origin} :splat {:type :splat :data (when (check (advance parser) :word) (word-only parser)) :token origin}
:word (try (word parser) ([e] e)) :word (capture word-pattern parser)
:keyword (do :keyword (do
(def key (capture kw parser)) (def key (capture kw parser))
(def value (capture pattern parser)) (def value (capture pattern parser))
@ -658,39 +664,49 @@
(expect parser :with) (advance parser) (expect parser :with) (advance parser)
(try (try
(do (do
(expect parser :lbrace) (advance parser) (expect parser :lbrace) (var lbrace (current parser)) (advance parser)
(accept-many parser ;terminators) (accept-many parser ;terminators)
(def data @[])
(def clauses @[]) (def clauses @[])
(array/push clauses (with-clause parser)) (array/push clauses (with-clause parser))
(accept-many parser ;terminators) (accept-many parser ;terminators)
(while (not (check parser :rbrace)) (while (not (check parser :rbrace))
(if (check parser :eof) (if (check parser :eof)
(error {:type :error :data data :token origin :msg "unclosed brace"})) (error {:type :error :data [clauses] :token lbrace :msg "unclosed brace"}))
(array/push clauses (with-clause parser)) (array/push clauses (with-clause parser))
(accept-many parser ;terminators)) (accept-many parser ;terminators))
(array/push data clauses) (advance parser) # consume closing brace
(accept-many parser :newline) (accept-many parser :newline)
(expect parser :then) (advance parser) (expect parser :then) (advance parser)
(array/push data (nonbinding parser)) (def then (nonbinding parser))
(accept-many parser :newline) (accept-many parser :newline)
(expect parser :else) (advance parser) (expect parser :else) (advance parser)
(array/push data (nonbinding parser)) (expect parser :lbrace) (set lbrace (current parser)) (advance parser)
{:type :with :data data :token origin}) (accept-many parser ;terminators)
(def else @[])
(while (not (check parser :rbrace))
(when (check parser :eof) (error {:type :error :token lbrace :data [else] :msg "unclosed brace"}))
(array/push else (match-clause parser)))
(advance parser)
{:type :with :data [clauses then else] :token origin})
([err] err) ([err] err)
) )
) )
### function forms ### function forms
(defn- fn-simple [parser] (defn- fn-simple [parser]
(print "parsing simple function body")
(try (try
(do (do
(def lhs (tup-pattern parser)) (def lhs (tup-pattern parser))
(print "parsed lhs")
(def guard (when (check parser :if) (def guard (when (check parser :if)
(advance parser) (advance parser)
(simple parser))) (simple parser)))
(print "parsed guard")
(expect parser :arrow) (advance parser) (expect parser :arrow) (advance parser)
(print "parsed arrow")
(def rhs (nonbinding parser)) (def rhs (nonbinding parser))
(print "parsed rhs")
[[lhs guard rhs]] [[lhs guard rhs]]
) )
([err] err) ([err] err)
@ -718,6 +734,7 @@
) )
(defn- fn-clauses [parser] (defn- fn-clauses [parser]
(print "parsing fn clauses")
(def origin (current parser)) (def origin (current parser))
(expect parser :lbrace) (advance parser) (expect parser :lbrace) (advance parser)
(accept-many parser ;terminators) (accept-many parser ;terminators)
@ -728,25 +745,31 @@
(array/push data (capture fn-clause parser))) (array/push data (capture fn-clause parser)))
data) data)
(defn- fnn [parser]
(try
(do
(def origin (current parser))
(expect parser :fn) (advance parser)
(def name (word parser))
(def data (case (-> parser current type)
:lbrace (fn-clauses parser)
:lparen (fn-simple parser)
(panic parser (string "expected clause or clauses, got " (-> current parser type)))))
{:type :fn :name name :data data :token origin}
)
([err] err)))
(defn- lambda [parser] (defn- lambda [parser]
(def origin (current parser)) (def origin (current parser))
(expect parser :fn) (advance parser) (expect parser :fn) (advance parser)
{:type :fn :data (fn-simple parser) :token origin}) {:type :fn :data (fn-simple parser) :token origin})
(defn- fnn [parser]
(if (= :lparen (-> parser peek type)) (break (lambda parser)))
(try
(do
(print "parsing named function")
(def origin (current parser))
(expect parser :fn) (advance parser)
(print "consumed `fn`")
(print "next token: ")
(pp (current parser))
(def name (-> parser word-only (get :data)))
(print "function name: ")
(pp name)
(def data (case (-> parser current type)
:lbrace (fn-clauses parser)
:lparen (fn-simple parser)
(panic parser (string "expected clause or clauses, got " (-> current parser type)))))
{:type :fn :name name :data data :token origin})
([err] err)))
### compoound forms ### compoound forms
(defn- block [parser] (defn- block [parser]
(def origin (current parser)) (def origin (current parser))
@ -770,7 +793,10 @@
(expect parser :do) (advance parser) (expect parser :do) (advance parser)
(def data @[]) (def data @[])
(array/push data (capture simple parser)) (array/push data (capture simple parser))
(print "added first expression. current token:")
(pp (current parser))
(while (check parser :pipeline) (while (check parser :pipeline)
(advance parser)
(accept-many parser :newline) (accept-many parser :newline)
(array/push data (capture simple parser))) (array/push data (capture simple parser)))
{:type :do :data data :token origin}) {:type :do :data data :token origin})
@ -781,7 +807,7 @@
(expect parser :ref) (advance parser) (expect parser :ref) (advance parser)
(try (try
(do (do
(def name (word parser)) (def name (-> parser word-only (get :data)))
(expect parser :equals) (advance parser) (expect parser :equals) (advance parser)
(def value (nonbinding parser)) (def value (nonbinding parser))
{:type :ref :data value :name name :token origin}) {:type :ref :data value :name name :token origin})
@ -807,7 +833,7 @@
(do (do
(def origin (current parser)) (def origin (current parser))
(expect parser :pkg) (advance parser) (expect parser :pkg) (advance parser)
(def name (pkg-name parser)) (def name (-> parser pkg-name (get :data)))
(expect parser :lbrace) (advance parser) (expect parser :lbrace) (advance parser)
(accept-many parser ;terminators) (accept-many parser ;terminators)
(def data @[]) (def data @[])
@ -822,7 +848,7 @@
(def key (capture kw parser)) (def key (capture kw parser))
(def value (capture simple parser)) (def value (capture simple parser))
(array/push data {:type :pair :data [key value] :token origin})) (array/push data {:type :pair :data [key value] :token origin}))
:word (array/push (capture word parser)) :word (array/push (capture word-only parser))
(panic parser "expected dict term")) (panic parser "expected dict term"))
(terminator parser)) (terminator parser))
{:type :pkg :data data :token origin :name name}) {:type :pkg :data data :token origin :name name})
@ -833,7 +859,7 @@
(do (do
(def origin (current parser)) (def origin (current parser))
(expect parser :ns) (advance parser) (expect parser :ns) (advance parser)
(def name (pkg-name parser)) (def name (-> parser pkg-name (get :data)))
(def body (block parser)) (def body (block parser))
{:type :ns :data body :name name :token origin}) {:type :ns :data body :name name :token origin})
([err] err))) ([err] err)))
@ -843,7 +869,9 @@
(expect parser :import) (advance parser) (expect parser :import) (advance parser)
(def path (str parser)) (def path (str parser))
(expect parser :as) (advance parser) (expect parser :as) (advance parser)
(def name (pkg-name parser)) (def name-parser (if (check parser :pkg-name) pkg-name word-only))
(def name
(-> parser name-parser (get :data)))
{:type :import :data path :name name :token origin}) {:type :import :data path :name name :token origin})
### tests ### tests
@ -874,14 +902,21 @@
(defn- repeatt [parser] (defn- repeatt [parser]
(def origin (current parser)) (def origin (current parser))
(advance parser)
(def times (case (-> parser current type) (def times (case (-> parser current type)
:number (num parser) :number (num parser)
:word (word parser) :word (word-only parser)
(panic parser "expected number or word") (panic parser "expected number or word")
)) ))
(def body (block parser)) (def body (block parser))
{:type :repeat :data [times body] :token origin}) {:type :repeat :data [times body] :token origin})
### panics
(defn- panicc [parser]
(def origin (current parser))
(expect parser :panic) (advance parser)
{:type :panic :data (nonbinding parser) :token origin})
### expressions ### expressions
# four levels of expression complexity: # four levels of expression complexity:
# simple (atoms, collections, synthetic expressions; no conditionals or binding or blocks) # simple (atoms, collections, synthetic expressions; no conditionals or binding or blocks)
@ -904,9 +939,10 @@
:lbracket (list parser) :lbracket (list parser)
:startdict (dict parser) :startdict (dict parser)
:startset (sett parser) :startset (sett parser)
:word (word parser) :word (word-expr parser)
:pkg-name (pkg-name parser) :pkg-name (pkg-name parser)
:recur (recur parser) :recur (recur parser)
:panic (panicc parser)
(panic parser (string expect "expected simple expression, got " (type curr))) (panic parser (string expect "expected simple expression, got " (type curr)))
) )
) )
@ -936,7 +972,7 @@
:startset (sett parser) :startset (sett parser)
# synthetic # synthetic
:word (word parser) :word (word-expr parser)
:pkg-name (pkg-name parser) :pkg-name (pkg-name parser)
:recur (recur parser) :recur (recur parser)
@ -955,6 +991,13 @@
# blocks # blocks
:lbrace (block parser) :lbrace (block parser)
# looping forms
:loop (loopp parser)
:repeat (repeatt parser)
# panic!
:panic (panicc parser)
(panic parser (string "expected nonbinding expression, got " (type curr))) (panic parser (string "expected nonbinding expression, got " (type curr)))
) )
) )
@ -978,7 +1021,7 @@
:lbracket (list parser) :lbracket (list parser)
:startdict (dict parser) :startdict (dict parser)
:startset (sett parser) :startset (sett parser)
:word (word parser) :word (word-expr parser)
:pkg-name (pkg-name parser) :pkg-name (pkg-name parser)
:recur (recur parser) :recur (recur parser)
:if (iff parser) :if (iff parser)
@ -987,6 +1030,9 @@
:with (withh parser) :with (withh parser)
:do (doo parser) :do (doo parser)
:lbrace (block parser) :lbrace (block parser)
:loop (loopp parser)
:repeat (repeatt parser)
:panic (panicc parser)
(panic parser (string "expected expression, got " (type curr))) (panic parser (string "expected expression, got " (type curr)))
) )
) )
@ -1035,13 +1081,14 @@
(do (do
#(comment #(comment
(def source ` (def source `
fn foo (x) -> :foo panic! foo
`) `)
(def scanned (s/scan source)) (def scanned (s/scan source))
(def a-parser (new-parser scanned)) (def a-parser (new-parser scanned))
(def parsed (script a-parser))
(print "\n***NEW PARSE***\n") (print "\n***NEW PARSE***\n")
(print (pp-ast parsed)) (def parsed (toplevel a-parser))
# (print (pp-ast parsed))
(pp parsed)
) )
@ -1049,4 +1096,3 @@ fn foo (x) -> :foo
# TODO: # TODO:
# DECIDE: # DECIDE:
# - when to use a flat try/catch format, and when to use capture/expect-ret to get values instead of errors # - when to use a flat try/catch format, and when to use capture/expect-ret to get values instead of errors