complete draft of parsing

This commit is contained in:
Scott Richmond 2024-05-11 23:25:36 -04:00
parent 806ec0e8f0
commit 2cfe9fdffc

View File

@ -259,13 +259,18 @@
(def sequels [:lparen :keyword])
(defn- word [parser]
(defn- word-expr [parser]
(expect parser :word)
(if (has-value? sequels (-> parser peek type)) (break (synthetic parser)))
(def curr (-> parser current))
(advance parser)
{:type :word :data (curr :lexeme) :token curr}
)
{:type :word :data (curr :lexeme) :token curr})
(defn- word-only [parser]
(expect parser :word)
(def curr (current parser))
(advance parser)
{:type :word :data (curr :lexeme) :token curr})
(defn- args [parser]
(def origin (current parser))
@ -289,7 +294,7 @@
(set (ast :partial) true)
(advance parser)
{:type :placeholder :token origin}))
(try (nonbinding parser) ([e] e))))
(capture nonbinding parser)))
(array/push (ast :data) term)
(try (separators parser)
([e] (pp e) (array/push (ast :data) e))))
@ -297,20 +302,21 @@
ast)
(defn- synth-root [parser]
(print "parsing synth root")
(def origin (current parser))
(advance parser)
(case (type origin)
:word {:type :word :data (origin :lexeme) :token origin}
:keyword {:type :keyword :data (origin :literal) :token origin}
:pkg-name {:type :pkg-name :data (origin :lexem) :token origin}
:pkg-name {:type :pkg-name :data (origin :lexeme) :token origin}
(panic parser "expected word, keyword, or package")
)
)
(defrec synthetic [parser]
(print "parsing synthetic")
(def origin (current parser))
(def ast {:type :synthetic :data @[(synth-root origin)] :token origin})
(advance parser)
(def ast {:type :synthetic :data @[(synth-root parser)] :token origin})
(while (has-value? sequels (-> parser current type))
(def term
(case (-> parser current type)
@ -333,7 +339,7 @@
(def err {:type :error :token origin :msg "unclosed paren"})
(array/push (parser :errors) err)
(error err))
(def term (try (nonbinding parser) ([e] e)))
(def term (capture nonbinding parser))
(array/push (ast :data) term)
(try (separators parser)
([e] (pp e) (array/push (ast :data) e))))
@ -354,10 +360,10 @@
(def term (if (check parser :splat)
(do
(advance parser)
(def splatted (try (word parser) ([e] e)))
(def splatted (try (word-only parser) ([e] e)))
{:type :splat :data splatted :token origin}
)
(try (nonbinding parser) ([e] e))))
(capture nonbinding parser)))
(array/push (ast :data) term)
(try (separators parser)
([e] (array/push (ast :data) e))))
@ -378,10 +384,10 @@
(def term (if (check parser :splat)
(do
(advance parser)
(def splatted (capture word parser))
(def splatted (capture word-only parser))
{:type :splat :data splatted :token origin}
)
(try (nonbinding parser) ([e] e))))
(capture nonbinding parser)))
(array/push (ast :data) term)
(try (separators parser)
([e] (array/push (ast :data) e))))
@ -400,11 +406,11 @@
(error err))
(def origin (current parser))
(def term (case (type origin)
:splat {:type :splat :data (try (word (advance parser)) ([e] e)) :token origin}
:word (try (word parser) ([e] e))
:splat {:type :splat :data (capture word-only (advance parser)) :token origin}
:word (try (word-only parser) ([e] e))
:keyword (do
(def key (try (kw parser) ([e] e)))
(def value (try (nonbinding parser) ([e] e)))
(def value (capture nonbinding parser))
{:type :pair :data [key value] :token origin})
(try (panic parser (string expect "expected dict term, got " (type origin))) ([e] e))
))
@ -448,9 +454,9 @@
(def term (if (check parser :splat)
(do
(advance parser)
(def splatted (if (check parser :word) (word parser) nil))
(def splatted (when (check parser :word) (word-only parser)))
{:type :splat :data splatted :token origin})
(try (pattern parser) ([e] e))))
(capture pattern parser)))
(array/push (ast :data) term)
(try (separators parser)
([e] (pp e) (array/push (ast :data) e))))
@ -471,9 +477,9 @@
(def term (if (check parser :splat)
(do
(advance parser)
(def splatted (if (check parser :word) (word parser) nil))
(def splatted (when (check parser :word) (word-only parser)))
{:type :splat :data splatted :token origin})
(try (pattern parser) ([e] e))))
(capture pattern parser)))
(array/push (ast :data) term)
(try (separators parser)
([e] (array/push (ast :data) e))))
@ -492,8 +498,8 @@
(error err))
(def origin (current parser))
(def term (case (type origin)
:splat {:type :splat :data (try (word (advance parser)) ([_] nil)) :token origin}
:word (try (word parser) ([e] e))
:splat {:type :splat :data (when (check (advance parser) :word) (word-only parser)) :token origin}
:word (capture word-pattern parser)
:keyword (do
(def key (capture kw parser))
(def value (capture pattern parser))
@ -658,39 +664,49 @@
(expect parser :with) (advance parser)
(try
(do
(expect parser :lbrace) (advance parser)
(expect parser :lbrace) (var lbrace (current parser)) (advance parser)
(accept-many parser ;terminators)
(def data @[])
(def clauses @[])
(array/push clauses (with-clause parser))
(accept-many parser ;terminators)
(while (not (check parser :rbrace))
(if (check parser :eof)
(error {:type :error :data data :token origin :msg "unclosed brace"}))
(error {:type :error :data [clauses] :token lbrace :msg "unclosed brace"}))
(array/push clauses (with-clause parser))
(accept-many parser ;terminators))
(array/push data clauses)
(advance parser) # consume closing brace
(accept-many parser :newline)
(expect parser :then) (advance parser)
(array/push data (nonbinding parser))
(def then (nonbinding parser))
(accept-many parser :newline)
(expect parser :else) (advance parser)
(array/push data (nonbinding parser))
{:type :with :data data :token origin})
(expect parser :lbrace) (set lbrace (current parser)) (advance parser)
(accept-many parser ;terminators)
(def else @[])
(while (not (check parser :rbrace))
(when (check parser :eof) (error {:type :error :token lbrace :data [else] :msg "unclosed brace"}))
(array/push else (match-clause parser)))
(advance parser)
{:type :with :data [clauses then else] :token origin})
([err] err)
)
)
### function forms
(defn- fn-simple [parser]
(print "parsing simple function body")
(try
(do
(def lhs (tup-pattern parser))
(print "parsed lhs")
(def guard (when (check parser :if)
(advance parser)
(simple parser)))
(print "parsed guard")
(expect parser :arrow) (advance parser)
(print "parsed arrow")
(def rhs (nonbinding parser))
(print "parsed rhs")
[[lhs guard rhs]]
)
([err] err)
@ -718,6 +734,7 @@
)
(defn- fn-clauses [parser]
(print "parsing fn clauses")
(def origin (current parser))
(expect parser :lbrace) (advance parser)
(accept-many parser ;terminators)
@ -728,25 +745,31 @@
(array/push data (capture fn-clause parser)))
data)
(defn- fnn [parser]
(try
(do
(def origin (current parser))
(expect parser :fn) (advance parser)
(def name (word parser))
(def data (case (-> parser current type)
:lbrace (fn-clauses parser)
:lparen (fn-simple parser)
(panic parser (string "expected clause or clauses, got " (-> current parser type)))))
{:type :fn :name name :data data :token origin}
)
([err] err)))
(defn- lambda [parser]
(def origin (current parser))
(expect parser :fn) (advance parser)
{:type :fn :data (fn-simple parser) :token origin})
(defn- fnn [parser]
(if (= :lparen (-> parser peek type)) (break (lambda parser)))
(try
(do
(print "parsing named function")
(def origin (current parser))
(expect parser :fn) (advance parser)
(print "consumed `fn`")
(print "next token: ")
(pp (current parser))
(def name (-> parser word-only (get :data)))
(print "function name: ")
(pp name)
(def data (case (-> parser current type)
:lbrace (fn-clauses parser)
:lparen (fn-simple parser)
(panic parser (string "expected clause or clauses, got " (-> current parser type)))))
{:type :fn :name name :data data :token origin})
([err] err)))
### compoound forms
(defn- block [parser]
(def origin (current parser))
@ -770,7 +793,10 @@
(expect parser :do) (advance parser)
(def data @[])
(array/push data (capture simple parser))
(print "added first expression. current token:")
(pp (current parser))
(while (check parser :pipeline)
(advance parser)
(accept-many parser :newline)
(array/push data (capture simple parser)))
{:type :do :data data :token origin})
@ -781,7 +807,7 @@
(expect parser :ref) (advance parser)
(try
(do
(def name (word parser))
(def name (-> parser word-only (get :data)))
(expect parser :equals) (advance parser)
(def value (nonbinding parser))
{:type :ref :data value :name name :token origin})
@ -807,7 +833,7 @@
(do
(def origin (current parser))
(expect parser :pkg) (advance parser)
(def name (pkg-name parser))
(def name (-> parser pkg-name (get :data)))
(expect parser :lbrace) (advance parser)
(accept-many parser ;terminators)
(def data @[])
@ -822,7 +848,7 @@
(def key (capture kw parser))
(def value (capture simple parser))
(array/push data {:type :pair :data [key value] :token origin}))
:word (array/push (capture word parser))
:word (array/push (capture word-only parser))
(panic parser "expected dict term"))
(terminator parser))
{:type :pkg :data data :token origin :name name})
@ -833,7 +859,7 @@
(do
(def origin (current parser))
(expect parser :ns) (advance parser)
(def name (pkg-name parser))
(def name (-> parser pkg-name (get :data)))
(def body (block parser))
{:type :ns :data body :name name :token origin})
([err] err)))
@ -843,7 +869,9 @@
(expect parser :import) (advance parser)
(def path (str parser))
(expect parser :as) (advance parser)
(def name (pkg-name parser))
(def name-parser (if (check parser :pkg-name) pkg-name word-only))
(def name
(-> parser name-parser (get :data)))
{:type :import :data path :name name :token origin})
### tests
@ -874,14 +902,21 @@
(defn- repeatt [parser]
(def origin (current parser))
(advance parser)
(def times (case (-> parser current type)
:number (num parser)
:word (word parser)
:word (word-only parser)
(panic parser "expected number or word")
))
(def body (block parser))
{:type :repeat :data [times body] :token origin})
### panics
(defn- panicc [parser]
(def origin (current parser))
(expect parser :panic) (advance parser)
{:type :panic :data (nonbinding parser) :token origin})
### expressions
# four levels of expression complexity:
# simple (atoms, collections, synthetic expressions; no conditionals or binding or blocks)
@ -904,9 +939,10 @@
:lbracket (list parser)
:startdict (dict parser)
:startset (sett parser)
:word (word parser)
:word (word-expr parser)
:pkg-name (pkg-name parser)
:recur (recur parser)
:panic (panicc parser)
(panic parser (string expect "expected simple expression, got " (type curr)))
)
)
@ -936,7 +972,7 @@
:startset (sett parser)
# synthetic
:word (word parser)
:word (word-expr parser)
:pkg-name (pkg-name parser)
:recur (recur parser)
@ -955,6 +991,13 @@
# blocks
:lbrace (block parser)
# looping forms
:loop (loopp parser)
:repeat (repeatt parser)
# panic!
:panic (panicc parser)
(panic parser (string "expected nonbinding expression, got " (type curr)))
)
)
@ -978,7 +1021,7 @@
:lbracket (list parser)
:startdict (dict parser)
:startset (sett parser)
:word (word parser)
:word (word-expr parser)
:pkg-name (pkg-name parser)
:recur (recur parser)
:if (iff parser)
@ -987,6 +1030,9 @@
:with (withh parser)
:do (doo parser)
:lbrace (block parser)
:loop (loopp parser)
:repeat (repeatt parser)
:panic (panicc parser)
(panic parser (string "expected expression, got " (type curr)))
)
)
@ -1035,13 +1081,14 @@
(do
#(comment
(def source `
fn foo (x) -> :foo
panic! foo
`)
(def scanned (s/scan source))
(def a-parser (new-parser scanned))
(def parsed (script a-parser))
(print "\n***NEW PARSE***\n")
(print (pp-ast parsed))
(def parsed (toplevel a-parser))
# (print (pp-ast parsed))
(pp parsed)
)
@ -1049,4 +1096,3 @@ fn foo (x) -> :foo
# TODO:
# DECIDE:
# - when to use a flat try/catch format, and when to use capture/expect-ret to get values instead of errors