Make (_) a parsing error

This commit is contained in:
Scott Richmond 2022-11-03 17:51:33 -04:00
parent e248be16cf
commit 1caddfacbc

View File

@ -1,10 +1,10 @@
(ns ludus.parser
(:require
[ludus.token :as token]
[ludus.scanner :as scanner]
[ludus.ast :as ast]
[clojure.pprint :as pp]
[clojure.set :as s]))
[ludus.token :as token]
[ludus.scanner :as scanner]
[ludus.ast :as ast]
[clojure.pprint :as pp]
[clojure.set :as s]))
;; a parser map and some functions to work with them
(defn- parser [tokens]
@ -47,8 +47,8 @@
:origin origin
:end end}]
(-> parser
(assoc ::ast poison)
(update ::errors conj poison))))
(assoc ::ast poison)
(update ::errors conj poison))))
(defn- poisoned? [parser]
(= ::ast/poison (get-in parser [::ast ::ast/type])))
@ -74,8 +74,8 @@
(if (contains? tokens type)
(advance parser)
(-> parser
(advance)
(panic message tokens)))))
(advance)
(panic message tokens)))))
(defn- expect* [tokens message parser]
(let [curr (current parser)
@ -106,10 +106,10 @@
(defn- parse-atom [parser]
(let [token (current parser)]
(-> parser
(advance)
(assoc ::ast {::ast/type ::ast/atom
:token token
:value (::token/literal token)}))))
(advance)
(assoc ::ast {::ast/type ::ast/atom
:token token
:value (::token/literal token)}))))
;; just a quick and dirty map to associate atomic words with values
(def atomic-words {::token/nil nil
@ -119,17 +119,17 @@
(defn parse-atomic-word [parser]
(let [token (current parser)]
(-> parser
(advance)
(assoc ::ast {::ast/type ::ast/atom
:token token
:value (get atomic-words (::token/type token))}))))
(advance)
(assoc ::ast {::ast/type ::ast/atom
:token token
:value (get atomic-words (::token/type token))}))))
(defn- parse-datatype [parser]
(let [token (current parser)]
(-> parser (advance)
(assoc ::ast {::ast/type ::ast/datatype
:token token
:value (::token/literal token)}))))
(assoc ::ast {::ast/type ::ast/datatype
:token token
:value (::token/literal token)}))))
(defn- add-member [members member]
(if (nil? member)
@ -139,24 +139,29 @@
(defn- contains-placeholder? [members]
(< 0 (count (filter #(= ::ast/placeholder (::ast/type %1)) members))))
(defn unary-placeholder? [tuple]
(and (:partial tuple) (= (:length tuple) 1)))
(defn- parse-fn-tuple [origin]
(loop [parser (accept-many #{::token/newline ::token/comma} (advance origin))
members []
current_member nil]
(let [curr (current parser)]
(case (token-type parser)
::token/rparen (let [ms (add-member members current_member)]
(assoc (advance parser) ::ast
{::ast/type ::ast/tuple
:length (count ms)
:members ms
:token (current origin)
:partial (contains-placeholder? ms)}))
::token/rparen (let [ms (add-member members current_member)
ast {::ast/type ::ast/tuple
:length (count ms)
:members ms
:token (current origin)
:partial (contains-placeholder? ms)}]
(if (unary-placeholder? ast)
(panic parser "You may not use a placeholder in a tuple of length 1. You may only partially apply functions that take more than one argument.")
(assoc (advance parser) ::ast ast)))
(::token/comma ::token/newline)
(recur
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(::token/rbrace ::token/rbracket)
(panic parser (str "Mismatched enclosure in tuple: " (::token/lexeme curr)))
@ -164,11 +169,11 @@
::token/placeholder
(if (contains-placeholder? members)
(recur
(advance parser)
members
(panic parser "Partially applied functions must be unary. (Only one placeholder allowed in partial application.)" curr))
(advance parser)
members
(panic parser "Partially applied functions must be unary. (Only one placeholder allowed in partial application.)" curr))
(recur
(advance parser) members {::ast/type ::ast/placeholder :token curr}))
(advance parser) members {::ast/type ::ast/placeholder :token curr}))
::token/eof
(panic (assoc origin ::errors (::errors parser)) "Unterminated tuple" ::token/eof)
@ -184,24 +189,24 @@
(case (token-type parser)
::token/rparen (let [ms (add-member members current_member)]
(assoc (advance parser) ::ast
{::ast/type ::ast/tuple
:token (current origin)
:length (count ms)
:members ms}))
{::ast/type ::ast/tuple
:token (current origin)
:length (count ms)
:members ms}))
(::token/comma ::token/newline)
(recur
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(::token/rbrace ::token/rbracket)
(panic parser (str "Mismatched enclosure in tuple: " (::token/lexeme curr)))
::token/placeholder
(recur
(advance parser)
members
(panic parser "Placeholders in tuples may only be in function calls." curr))
(advance parser)
members
(panic parser "Placeholders in tuples may only be in function calls." curr))
::token/eof
(panic (assoc origin ::errors (::errors parser)) "Unterminated tuple" ::token/eof)
@ -219,14 +224,14 @@
(case (token-type parser)
::token/rbracket (let [ms (add-member members current_member)]
(assoc (advance parser) ::ast
{::ast/type ::ast/list
:token (current origin)
:members ms}))
{::ast/type ::ast/list
:token (current origin)
:members ms}))
(::token/comma ::token/newline)
(recur
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(::token/rbrace ::token/rparen)
(panic parser (str "Mismatched enclosure in list: " (::token/lexeme curr)))
@ -255,14 +260,14 @@
(case (token-type parser)
::token/rbrace (let [ms (add-member members current_member)]
(assoc (advance parser) ::ast
{::ast/type ::ast/set
:token (current origin)
:members ms}))
{::ast/type ::ast/set
:token (current origin)
:members ms}))
(::token/comma ::token/newline)
(recur
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(::token/rbracket ::token/rparen)
(panic parser (str "Mismatched enclosure in set: " (::token/lexeme curr)))
@ -291,14 +296,14 @@
(case (token-type parser)
::token/rbrace (let [ms (add-member members current_member)]
(assoc (advance parser) ::ast
{::ast/type ::ast/dict
:token (current origin)
:members ms}))
{::ast/type ::ast/dict
:token (current origin)
:members ms}))
(::token/comma ::token/newline)
(recur
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(::token/rbracket ::token/rparen)
(panic parser (str "Mismatched enclosure in dict: " (::token/lexeme curr)))
@ -338,14 +343,14 @@
(case (token-type parser)
::token/rbrace (let [ms (add-member members current_member)]
(assoc (advance parser) ::ast
{::ast/type ::ast/struct
:token (current origin)
:members ms}))
{::ast/type ::ast/struct
:token (current origin)
:members ms}))
(::token/comma ::token/newline)
(recur
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(::token/rbracket ::token/rparen)
(panic parser (str "Mismatched enclosure in struct: " (::token/lexeme curr)))
@ -356,12 +361,12 @@
::token/word
(if (not current_member) (let [parsed (parse-word parser) word (get-in parsed [::ast :word])]
(recur parsed members {(keyword word) (::ast parsed)}))
(panic parser "Struct entries must be single words or keyword+expression pairs." #{::token/rbrace}))
(panic parser "Struct entries must be single words or keyword+expression pairs." #{::token/rbrace}))
::token/keyword
(if (not current_member) (let [kw (parse-atom parser) expr (parse-expr kw #{::token/comma ::token/newline ::token/rbrace})]
(recur expr members {(:value (::ast kw)) (::ast expr)}))
(panic parser "Struct entries must be single words or keyword+expression pairs." #{::token/rbrace}))
(panic parser "Struct entries must be single words or keyword+expression pairs." #{::token/rbrace}))
(panic parser "Struct entries must be single words or keyword+expression pairs" #{::token/rbrace})))))
@ -381,15 +386,15 @@
(case (token-type parser)
::token/rbrace (let [ms (add-member members current_member)]
(assoc (advance parser) ::ast
{::ast/type ::ast/ns
:token (current ns-root)
:name (get-in (parse-word (advance ns-root)) [::ast :word])
:members ms}))
{::ast/type ::ast/ns
:token (current ns-root)
:name (get-in (parse-word (advance ns-root)) [::ast :word])
:members ms}))
(::token/comma ::token/newline)
(recur
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(::token/rbracket ::token/rparen)
(panic parser (str "Mismatched enclosure in ns: " (::token/lexeme curr)))
@ -400,12 +405,12 @@
::token/word
(if (not current_member) (let [parsed (parse-word parser) word (get-in parsed [::ast :word])]
(recur parsed members {(keyword word) (::ast parsed)}))
(panic parser "ns entries must be single words or keyword+expression pairs." #{::token/rbrace}))
(panic parser "ns entries must be single words or keyword+expression pairs." #{::token/rbrace}))
::token/keyword
(if (not current_member) (let [kw (parse-atom parser) expr (parse-expr kw #{::token/comma ::token/newline ::token/rbrace})]
(recur expr members {(:value (::ast kw)) (::ast expr)}))
(panic parser "ns entries must be single words or keyword+expression pairs." #{::token/rbrace}))
(panic parser "ns entries must be single words or keyword+expression pairs." #{::token/rbrace}))
(panic parser "ns entries must be single words or keyword+expression pairs" #{::token/rbrace})))))))
@ -425,8 +430,8 @@
(::token/semicolon ::token/newline)
(recur
(accept-many #{::token/newline ::token/semicolon} parser)
(add-member exprs current_expr) nil)
(accept-many #{::token/newline ::token/semicolon} parser)
(add-member exprs current_expr) nil)
(::token/rbracket ::token/rparen)
(panic parser (str "Mismatched enclosure in block: " (::token/lexeme curr)))
@ -461,8 +466,8 @@
(defn- parse-word [parser]
(let [curr (current parser)]
(-> parser
(advance)
(assoc ::ast {::ast/type ::ast/word :token (current parser) :word (::token/lexeme curr)}))))
(advance)
(assoc ::ast {::ast/type ::ast/word :token (current parser) :word (::token/lexeme curr)}))))
(def sync-pattern (s/union sync-on #{::token/equals ::token/rarrow}))
@ -471,21 +476,21 @@
(case (token-type splatted)
::token/word
(assoc (advance splatted) ::ast
{::ast/type ::ast/splat
:token (current origin)
:into (::ast (parse-word splatted))})
{::ast/type ::ast/splat
:token (current origin)
:into (::ast (parse-word splatted))})
::token/placeholder
(assoc (advance splatted) ::ast
{::ast/type ::ast/splat
:token (current origin)
:into {::ast/type ::ast/placeholder :token (current splatted)}})
{::ast/type ::ast/splat
:token (current origin)
:into {::ast/type ::ast/placeholder :token (current splatted)}})
(::token/comma ::token/newline ::token/rbrace ::token/rparen ::token/rbracket)
(assoc splatted ::ast
{::ast/type ::ast/splat
:token (current origin)
:into {::ast/type ::ast/placeholder :token (current origin)}})
{::ast/type ::ast/splat
:token (current origin)
:into {::ast/type ::ast/placeholder :token (current origin)}})
(panic origin "Splat patterns may only splat into words or placeholders."))))
@ -499,16 +504,16 @@
(let [ms (add-member members current_member)]
(if (not-any? #(= (::ast/type %) ::ast/splat) (drop-last ms))
(assoc (advance parser) ::ast
{::ast/type ::ast/list
:token (current origin)
:length (count ms)
:members ms})
{::ast/type ::ast/list
:token (current origin)
:length (count ms)
:members ms})
(panic parser "A splat my only appear once in a list pattern, at the end of the pattern.")))
(::token/comma ::token/newline)
(recur
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(::token/rbrace ::token/rparen)
(panic parser (str "Mismatched enclosure in list pattern: " (::token/lexeme curr)))
@ -535,17 +540,17 @@
(panic parser (str "Dict patterns may not duplicate keys: " current-key))
(let [ms (add-member members current_member)]
(assoc (advance parser) ::ast
{::ast/type ::ast/dict
:token (current origin)
:members ms}))))
{::ast/type ::ast/dict
:token (current origin)
:members ms}))))
(::token/comma ::token/newline)
(let [current-key (first (keys current_member))]
(if (current-key members)
(panic parser (str "Dict patterns may not duplicate keys: " current-key))
(recur
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)))
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)))
(::token/rbracket ::token/rparen)
(panic parser (str "Mismatched enclosure in dict pattern: " (::token/lexeme curr)))
@ -590,14 +595,14 @@
(case (token-type parser)
::token/rbrace (let [ms (add-member members current_member)]
(assoc (advance parser) ::ast
{::ast/type ::ast/struct
:token (current origin)
:members ms}))
{::ast/type ::ast/struct
:token (current origin)
:members ms}))
(::token/comma ::token/newline)
(recur
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(::token/rbracket ::token/rparen)
(panic parser (str "Mismatched enclosure in struct pattern: " (::token/lexeme curr)))
@ -644,16 +649,16 @@
(let [ms (add-member members current_member)]
(if (not-any? #(= (::ast/type %) ::ast/splat) (drop-last ms))
(assoc (advance parser) ::ast
{::ast/type ::ast/tuple
:token (current origin)
:length (count ms)
:members ms})
{::ast/type ::ast/tuple
:token (current origin)
:length (count ms)
:members ms})
(panic parser "A splat my only appear once in a tuple pattern, at the end of the pattern.")))
(::token/comma ::token/newline)
(recur
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(accept-many #{::token/comma ::token/newline} parser)
(add-member members current_member) nil)
(::token/rbrace ::token/rbracket)
(panic parser (str "Mismatched enclosure in tuple: " (::token/lexeme curr)))
@ -674,8 +679,8 @@
(case type
(::token/placeholder ::token/ignored)
(-> parser
(advance)
(assoc ::ast {::ast/type ::ast/placeholder :token curr}))
(advance)
(assoc ::ast {::ast/type ::ast/placeholder :token curr}))
::token/word (parse-word parser)
@ -869,12 +874,12 @@
(defn- parse-cond-clause [parser]
(let [expr (if
(contains? #{::token/else ::token/placeholder} (token-type parser))
(contains? #{::token/else ::token/placeholder} (token-type parser))
(-> parser
(advance)
(assoc ::ast {::ast/type ::ast/atom
:token (current parser)
:value true}))
(advance)
(assoc ::ast {::ast/type ::ast/atom
:token (current parser)
:value true}))
(parse-expr parser))
rarrow (expect* #{::token/rarrow} "Expected arrow after expression in cond clause" expr)]
(if (:success rarrow)
@ -1118,9 +1123,9 @@
(::token/semicolon ::token/newline)
(recur
(accept-many #{::token/semicolon ::token/newline} parser)
(add-member exprs current_expr)
nil)
(accept-many #{::token/semicolon ::token/newline} parser)
(add-member exprs current_expr)
nil)
(let [parsed
(if current_expr
@ -1215,9 +1220,9 @@
(defn parse [lexed]
(-> lexed
(:tokens)
(parser)
(parse-script)))
(:tokens)
(parser)
(parse-script)))
(do
(def source "