Add splats to list patterns in parser

This commit is contained in:
Scott Richmond 2022-06-19 14:48:53 -04:00
parent 4254359934
commit 68bc37ef61

View File

@ -1,10 +1,10 @@
(ns ludus.parser
(:require
[ludus.token :as token]
[ludus.scanner :as scanner]
[ludus.ast :as ast]
[clojure.pprint :as pp]
[clojure.set :as s]))
[ludus.token :as token]
[ludus.scanner :as scanner]
[ludus.ast :as ast]
[clojure.pprint :as pp]
[clojure.set :as s]))
;; a parser map and some functions to work with them
(defn- parser [tokens]
@ -47,8 +47,8 @@
:origin origin
:end end}]
(-> parser
(assoc ::ast poison)
(update ::errors conj poison))))
(assoc ::ast poison)
(update ::errors conj poison))))
(defn- poisoned? [parser]
(= ::ast/poison (get-in parser [::ast ::ast/type])))
@ -74,8 +74,8 @@
(if (contains? tokens type)
(advance parser)
(-> parser
(advance)
(panic message tokens)))))
(advance)
(panic message tokens)))))
(defn- expect* [tokens message parser]
(let [curr (current parser)
@ -106,10 +106,10 @@
(defn- parse-atom [parser]
(let [token (current parser)]
(-> parser
(advance)
(assoc ::ast {::ast/type ::ast/atom
:token token
:value (::token/literal token)}))))
(advance)
(assoc ::ast {::ast/type ::ast/atom
:token token
:value (::token/literal token)}))))
;; just a quick and dirty map to associate atomic words with values
(def atomic-words {::token/nil nil
@ -119,10 +119,10 @@
(defn parse-atomic-word [parser]
(let [token (current parser)]
(-> parser
(advance)
(assoc ::ast {::ast/type ::ast/atom
:token token
:value (get atomic-words (::token/type token))}))))
(advance)
(assoc ::ast {::ast/type ::ast/atom
:token token
:value (get atomic-words (::token/type token))}))))
(defn- add-member [members member]
(if (nil? member)
@ -140,16 +140,16 @@
(case (token-type parser)
::token/rparen (let [ms (add-member members current_member)]
(assoc (advance parser) ::ast
{::ast/type ::ast/tuple
:length (count ms)
:members ms
:token (current origin)
:partial (contains-placeholder? ms)}))
{::ast/type ::ast/tuple
:length (count ms)
:members ms
:token (current origin)
:partial (contains-placeholder? ms)}))
(::token/comma ::token/newline)
(recur
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(::token/rbrace ::token/rbracket)
(panic parser (str "Mismatched enclosure in tuple: " (::token/lexeme curr)))
@ -157,11 +157,11 @@
::token/placeholder
(if (contains-placeholder? members)
(recur
(advance parser)
members
(panic parser "Partially applied functions must be unary. (Only one placeholder allowed in partial application.)" curr))
(advance parser)
members
(panic parser "Partially applied functions must be unary. (Only one placeholder allowed in partial application.)" curr))
(recur
(advance parser) members {::ast/type ::ast/placeholder :token curr}))
(advance parser) members {::ast/type ::ast/placeholder :token curr}))
::token/eof
(panic (assoc origin ::errors (::errors parser)) "Unterminated tuple" ::token/eof)
@ -177,24 +177,24 @@
(case (token-type parser)
::token/rparen (let [ms (add-member members current_member)]
(assoc (advance parser) ::ast
{::ast/type ::ast/tuple
:token (current origin)
:length (count ms)
:members ms}))
{::ast/type ::ast/tuple
:token (current origin)
:length (count ms)
:members ms}))
(::token/comma ::token/newline)
(recur
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(::token/rbrace ::token/rbracket)
(panic parser (str "Mismatched enclosure in tuple: " (::token/lexeme curr)))
::token/placeholder
(recur
(advance parser)
members
(panic parser "Placeholders in tuples may only be in function calls." curr))
(advance parser)
members
(panic parser "Placeholders in tuples may only be in function calls." curr))
::token/eof
(panic (assoc origin ::errors (::errors parser)) "Unterminated tuple" ::token/eof)
@ -202,7 +202,7 @@
(if current_member
(panic parser "Comma expected between tuple members")
(let [parsed (parse-expr parser #{::token/comma ::token/newline ::token/rparen})]
(recur parsed members (::ast parsed))))))))
(recur parsed members (::ast parsed))))))))
(defn- parse-list [origin]
(loop [parser (accept-many #{::token/newline ::token/comma} (advance origin))
@ -212,14 +212,14 @@
(case (token-type parser)
::token/rbracket (let [ms (add-member members current_member)]
(assoc (advance parser) ::ast
{::ast/type ::ast/list
:token (current origin)
:members ms}))
{::ast/type ::ast/list
:token (current origin)
:members ms}))
(::token/comma ::token/newline)
(recur
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(::token/rbrace ::token/rparen)
(panic parser (str "Mismatched enclosure in list: " (::token/lexeme curr)))
@ -238,7 +238,7 @@
(if current_member
(panic parser "Comma expected between list members")
(let [parsed (parse-expr parser #{::token/comma ::token/newline ::token/rbracket})]
(recur parsed members (::ast parsed))))))))
(recur parsed members (::ast parsed))))))))
(defn- parse-set [origin]
(loop [parser (accept-many #{::token/newline ::token/comma} (advance origin))
@ -248,14 +248,14 @@
(case (token-type parser)
::token/rbrace (let [ms (add-member members current_member)]
(assoc (advance parser) ::ast
{::ast/type ::ast/set
:token (current origin)
:members ms}))
{::ast/type ::ast/set
:token (current origin)
:members ms}))
(::token/comma ::token/newline)
(recur
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(::token/rbracket ::token/rparen)
(panic parser (str "Mismatched enclosure in set: " (::token/lexeme curr)))
@ -274,7 +274,7 @@
(if current_member
(panic parser "Comma expected between set members")
(let [parsed (parse-expr parser #{::token/comma ::token/newline ::token/rbrace})]
(recur parsed members (::ast parsed))))))))
(recur parsed members (::ast parsed))))))))
(defn- parse-dict [origin]
(loop [parser (accept-many #{::token/newline ::token/comma} (advance origin))
@ -284,14 +284,14 @@
(case (token-type parser)
::token/rbrace (let [ms (add-member members current_member)]
(assoc (advance parser) ::ast
{::ast/type ::ast/dict
:token (current origin)
:members ms}))
{::ast/type ::ast/dict
:token (current origin)
:members ms}))
(::token/comma ::token/newline)
(recur
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(::token/rbracket ::token/rparen)
(panic parser (str "Mismatched enclosure in dict: " (::token/lexeme curr)))
@ -331,14 +331,14 @@
(case (token-type parser)
::token/rbrace (let [ms (add-member members current_member)]
(assoc (advance parser) ::ast
{::ast/type ::ast/struct
:token (current origin)
:members ms}))
{::ast/type ::ast/struct
:token (current origin)
:members ms}))
(::token/comma ::token/newline)
(recur
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(::token/rbracket ::token/rparen)
(panic parser (str "Mismatched enclosure in struct: " (::token/lexeme curr)))
@ -349,12 +349,12 @@
::token/word
(if (not current_member) (let [parsed (parse-word parser) word (get-in parsed [::ast :word])]
(recur parsed members {(keyword word) (::ast parsed)}))
(panic parser "Struct entries must be single words or keyword+expression pairs." #{::token/rbrace}))
(panic parser "Struct entries must be single words or keyword+expression pairs." #{::token/rbrace}))
::token/keyword
(if (not current_member) (let [kw (parse-atom parser) expr (parse-expr kw #{::token/comma ::token/newline ::token/rbrace})]
(recur expr members {(:value (::ast kw)) (::ast expr)}))
(panic parser "Struct entries must be single words or keyword+expression pairs." #{::token/rbrace}))
(panic parser "Struct entries must be single words or keyword+expression pairs." #{::token/rbrace}))
(panic parser "Struct entries must be single words or keyword+expression pairs" #{::token/rbrace})))))
@ -374,15 +374,15 @@
(case (token-type parser)
::token/rbrace (let [ms (add-member members current_member)]
(assoc (advance parser) ::ast
{::ast/type ::ast/ns
:token (current ns-root)
:name (get-in (parse-word (advance ns-root)) [::ast :word])
:members ms}))
{::ast/type ::ast/ns
:token (current ns-root)
:name (get-in (parse-word (advance ns-root)) [::ast :word])
:members ms}))
(::token/comma ::token/newline)
(recur
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(::token/rbracket ::token/rparen)
(panic parser (str "Mismatched enclosure in ns: " (::token/lexeme curr)))
@ -393,12 +393,12 @@
::token/word
(if (not current_member) (let [parsed (parse-word parser) word (get-in parsed [::ast :word])]
(recur parsed members {(keyword word) (::ast parsed)}))
(panic parser "ns entries must be single words or keyword+expression pairs." #{::token/rbrace}))
(panic parser "ns entries must be single words or keyword+expression pairs." #{::token/rbrace}))
::token/keyword
(if (not current_member) (let [kw (parse-atom parser) expr (parse-expr kw #{::token/comma ::token/newline ::token/rbrace})]
(recur expr members {(:value (::ast kw)) (::ast expr)}))
(panic parser "ns entries must be single words or keyword+expression pairs." #{::token/rbrace}))
(panic parser "ns entries must be single words or keyword+expression pairs." #{::token/rbrace}))
(panic parser "ns entries must be single words or keyword+expression pairs" #{::token/rbrace})))))))
@ -418,8 +418,8 @@
(::token/semicolon ::token/newline)
(recur
(accept-many #{::token/newline ::token/semicolon} parser)
(add-member exprs current_expr) nil)
(accept-many #{::token/newline ::token/semicolon} parser)
(add-member exprs current_expr) nil)
(::token/rbracket ::token/rparen)
(panic parser (str "Mismatched enclosure in block: " (::token/lexeme curr)))
@ -447,9 +447,9 @@
(::token/semicolon ::token/newline)
(recur
(accept-many #{::token/semicolon ::token/newline} parser)
(add-member exprs current_expr)
nil)
(accept-many #{::token/semicolon ::token/newline} parser)
(add-member exprs current_expr)
nil)
(let [parsed
(if current_expr
@ -479,27 +479,55 @@
(defn- parse-word [parser]
(let [curr (current parser)]
(-> parser
(advance)
(assoc ::ast {::ast/type ::ast/word :token (current parser) :word (::token/lexeme curr)}))))
(advance)
(assoc ::ast {::ast/type ::ast/word :token (current parser) :word (::token/lexeme curr)}))))
(def sync-pattern (s/union sync-on #{::token/equals ::token/rarrow}))
(defn- parse-splat-pattern [origin]
(let [splatted (advance origin)]
(case (token-type splatted)
::token/word
(assoc (advance splatted) ::ast
{::ast/type ::ast/splat
:token (current origin)
:into (::ast (parse-word splatted))})
::token/placeholder
(assoc (advance splatted) ::ast
{::ast/type ::ast/splat
:token (current origin)
:into {::ast/type ::ast/placeholder :token (current splatted)}})
(::token/comma ::token/newline ::token/rbrace ::token/rparen ::token/rbracket)
(assoc splatted ::ast
{::ast/type ::ast/splat
:token (current origin)
:into {::ast/type ::ast/placeholder :token (current origin)}})
(panic origin "Splat patterns may only splat into words or placeholders.")
)))
(defn- parse-list-pattern [origin]
(loop [parser (accept-many #{::token/newline ::token/comma} (advance origin))
members []
current_member nil]
(let [curr (current parser)]
(case (token-type parser)
::token/rbracket (let [ms (add-member members current_member)]
(assoc (advance parser) ::ast
{::ast/type ::ast/list
:token (current origin)
:members ms}))
::token/rbracket
(let [ms (add-member members current_member)]
(if (not-any? #(= (::ast/type %) ::ast/splat) (drop-last ms))
(assoc (advance parser) ::ast
{::ast/type ::ast/tuple
:token (current origin)
:length (count ms)
:members ms})
(panic parser "A splat my only appear once in a pattern, at the end of a pattern.")))
(::token/comma ::token/newline)
(recur
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(::token/rbrace ::token/rparen)
(panic parser (str "Mismatched enclosure in list pattern: " (::token/lexeme curr)))
@ -507,6 +535,10 @@
::token/eof
(panic (assoc origin ::errors (::errors parser)) "Unterminated list pattern" ::token/eof)
::token/splat
(let [splatted (parse-splat-pattern parser)]
(recur splatted members (::ast splatted)))
(let [parsed (parse-pattern parser)]
(recur parsed members (::ast parsed)))))))
@ -518,14 +550,14 @@
(case (token-type parser)
::token/rbrace (let [ms (add-member members current_member)]
(assoc (advance parser) ::ast
{::ast/type ::ast/dict
:token (current origin)
:members ms}))
{::ast/type ::ast/dict
:token (current origin)
:members ms}))
(::token/comma ::token/newline)
(recur
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(::token/rbracket ::token/rparen)
(panic parser (str "Mismatched enclosure in dict pattern: " (::token/lexeme curr)))
@ -555,14 +587,14 @@
(case (token-type parser)
::token/rbrace (let [ms (add-member members current_member)]
(assoc (advance parser) ::ast
{::ast/type ::ast/struct
:token (current origin)
:members ms}))
{::ast/type ::ast/struct
:token (current origin)
:members ms}))
(::token/comma ::token/newline)
(recur
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(::token/rbracket ::token/rparen)
(panic parser (str "Mismatched enclosure in struct pattern: " (::token/lexeme curr)))
@ -584,23 +616,26 @@
(panic parser "Struct patterns may only include single words or keyword+pattern pairs." #{::token/rbrace})))))
(defn- parse-tuple-pattern [origin]
(loop [parser (accept-many #{::token/newline ::token/comma} (advance origin))
members []
current_member nil]
(let [curr (current parser)]
(case (token-type parser)
::token/rparen (let [ms (add-member members current_member)]
(assoc (advance parser) ::ast
{::ast/type ::ast/tuple
:token (current origin)
:length (count ms)
:members ms}))
::token/rparen
(let [ms (add-member members current_member)]
(assoc (advance parser) ::ast
{::ast/type ::ast/tuple
:token (current origin)
:length (count ms)
:members ms}))
(::token/comma ::token/newline)
(recur
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(::token/rbrace ::token/rbracket)
(panic parser (str "Mismatched enclosure in tuple: " (::token/lexeme curr)))
@ -617,8 +652,8 @@
(case type
(::token/placeholder ::token/ignored)
(-> parser
(advance)
(assoc ::ast {::ast/type ::ast/placeholder :token curr}))
(advance)
(assoc ::ast {::ast/type ::ast/placeholder :token curr}))
::token/word (parse-word parser)
@ -812,12 +847,12 @@
(defn- parse-cond-clause [parser]
(let [expr (if
(contains? #{::token/else ::token/placeholder} (token-type parser))
(contains? #{::token/else ::token/placeholder} (token-type parser))
(-> parser
(advance)
(assoc ::ast {::ast/type ::ast/atom
:token (current parser)
:value true}))
(advance)
(assoc ::ast {::ast/type ::ast/atom
:token (current parser)
:value true}))
(parse-expr parser))
rarrow (expect* #{::token/rarrow} "Expected arrow after expression in cond clause" expr)]
(if (:success rarrow)
@ -963,7 +998,7 @@
(defn- parse-send [parser]
(let [msg (parse-expr (advance parser))
to (expect* ::token/to "Expected `to` between message and PID" msg)]
to (expect* ::token/to "Expected `to` between message and PID" msg)]
(if (:success to)
(let [pid (parse-expr (:parser to))]
(assoc pid ::ast {::ast/type ::ast/send :token (current parser) :msg (::ast msg) :pid (::ast pid)}))
@ -1066,14 +1101,13 @@
(defn parse [lexed]
(-> lexed
(:tokens)
(parser)
(parse-script)))
(:tokens)
(parser)
(parse-script)))
(comment
(do
(def pp pp/pprint)
(def source "if let foo = bar then foo else bar")
(def source "let [a, b, (c, d), ...e] = [1, 2, (4, 5), 6]")
(println "")
(println "")