Many iterations of parser combinator strategies. Not yet working.

This commit is contained in:
Scott Richmond 2023-05-07 22:49:19 -04:00
parent 2866ff4eb6
commit 5b1ff5aef3
3 changed files with 272 additions and 22 deletions

View File

@ -1,32 +1,227 @@
(ns ludus.parser-new (ns ludus.parser-new
(:require (:require
[ludus.scanner :as scan])) [ludus.scanner :as scan]))
(defn ok? [[ok]] (def msgs {
(= ok :ok))
(defn kw->type [kw] (apply str (next (str kw)))) })
(defn match [kw token] (defn ? [val default] (if (nil? val) default val))
(if (= kw (:type token))
[:ok token]
[:error token (str "Expected " (kw->type kw))]))
(defn parser (defn ok? [{status :status}]
([kw] {:type kw :fn #(match kw %)}) (= status :ok))
([kw err] {:type kw :fn #(assoc (match kw %) 2 err)}))
(defn pass? [{status :status}] (or (= status :ok) (= status :quiet)))
(defn choice [& args]) (defn data [{d :data}] d)
(def eg (:tokens (scan/scan "123 :foo"))) (defn remaining [{r :remaining}] r)
(def word (parser :word "fuck")) (defn pname [parser] (? (:name parser) parser))
(word (first eg)) (defn value [token]
(if (= :none (:literal token)) (:lexeme token) (:literal token)))
(defn apply-kw-parser [kw tokens]
(let [token (first tokens)]
(println "applying kw parser " kw " to " token)
(if (= kw (:type token))
{:status :ok :type kw :data [(value token)] :token token :remaining (rest tokens)}
{:status :err :token token :trace [kw] :remaining (rest tokens)})))
(defn apply-fn-parser [parser tokens]
(println "applying fn parser" parser ", " tokens)
(let [rule (:rule parser) name (:name parser)]
(println "appying fn parser " name " to " (first tokens))
(rule tokens)))
(defn apply-parser [parser tokens]
(if (keyword? parser)
(apply-kw-parser parser tokens)
(apply-fn-parser parser tokens)))
(defn pmap [f parser] (fn [tokens] (f (apply-parser parser tokens))))
(defn choice [name parsers]
{:name name
:rule (fn [tokens]
(println "entering CHOICE" name)
(loop [ps parsers]
(let [result (apply-parser (first ps) tokens)
rem-ts (remaining result)
rem-ps (rest ps)]
(cond
(pass? result)
{:status :ok :type name :data [result] :token (first tokens) :remaining rem-ts}
(empty? rem-ps)
{:status :err :token (first tokens) :trace [name] :remaining rem-ts}
:else (recur rem-ps)))))})
(defn order [name parsers]
{:name name
:rule (fn [tokens]
(println "entering ORDER" name)
(let [origin (first tokens)]
(loop [ps parsers
results []
ts tokens]
(let [result (apply-parser (first ps) ts)
res-rem (remaining result)]
(if (empty? (rest ps))
(case (:status result)
:ok {:status :ok
:type name
:data (conj results result)
:token origin
:remaining res-rem}
:quiet {:status :ok
:type name
:data results
:token origin
:remaining res-rem}
:group {:status :ok
:type name
:data (concat results (:data result))
:token origin
:remaining res-rem}
:err (update result :trace #(conj % name)))
(case (:status result)
:ok (recur (rest ps) (conj results result) res-rem)
:group (recur (rest ps)
;; TODO: fix this?
;; This is supposed to undo the :quiet/:group thing
(concat results
(filter #(= (:status %) :ok) (:data result)))
res-rem)
:quiet (recur (rest ps) results res-rem)
:err (update result :trace #(conj % name))))))))})
(defn quiet [parser]
{:name (? (:name parser) parser)
:rule (fn [tokens]
(let [result (apply-parser parser tokens)]
(if (pass? result)
(assoc result :status :quiet)
result)))})
(defn one+
([parser] (one+ (pname parser) parser))
([name parser]
{:name name
:rule (fn [tokens]
(let [result (apply-parser parser tokens)
rest (zero+ name parser)]
(case (:status result)
(:ok :quiet)
(let [rest-result (apply-parser rest (remaining result))
rest-data (data rest-result)
rest-remaining (remaining rest-result)]
(println rest-data)
{:status :group
:type name
:data (concat (data result) (second rest-data))
:token (first tokens)
:remaining rest-remaining})
:err result)))}))
(defn zero+
([parser] (zero+ (pname parser) parser))
([name parser]
{:name name
:rule (fn [tokens]
(println "entering ZERO+")
(loop [results []
ts tokens
back tokens]
(println "looping ZERO+" (:name parser))
(let [result (apply-parser parser ts)]
(if (pass? result)
(recur (conj results result) (remaining result) ts)
{:status :group :type name :data results :token (first tokens) :remaining ts}
))))}))
(defn maybe
([parser] (maybe (pname parser) parser))
([name parser]
{:name name
:rule (fn [tokens]
(let [result (apply-parser parser tokens)]
(if (pass? result)
result
{:status :group :type name :data [] :token (first tokens) :remaining tokens}
)))}))
(comment (comment
"So one thing I'm thinking about is the fact that zero+, one+, maybe all only really make sense in the context of an `order` call. So that idea is that anything that's in one of these should be added to the `order`'s data vector, rather than putting it in a subordinate structure.
(def string (parser :string)) This is much the same as the `quiet` idea: there should be some kind of internal representation of the thing.
) ***
And now the `group` status has broken `quiet`
")
(defn group
([parser] (pname parser) parser)
([name parser] (fn [tokens]
(let [result (apply-parser parser tokens)
data (map :data (:data result))]
{assoc result :data data}))))
(declare expression)
(def literal (choice :literal [:nil :true :false :number :string :keyword]))
(def separator (one+ (choice :separator [:comma :newline])))
(def nls? (quiet (zero+ :nls :newline)))
(def tuple-entries (order :tuple-entries [(quiet separator) expression]))
(def tuple (order :tuple
[(quiet :lparen)
(maybe expression)
(zero+ tuple-entries)
(quiet :rparen)]))
(def expression (choice :expression [tuple literal]))
(def foo (order :foo [:number :keyword]))
(def eg (:tokens (scan/scan "(1, 2, 3)")))
(def result (apply-parser tuple eg))
result
(defn clean [node]
(if (map? node)
(-> node
(dissoc
:status
:remaining
:token)
(update :data #(map clean %)))
node))
(defn tap [x] (println "\n\n\n\n******NEW PARSE\n\n:::=> " x "\n\n") x)
(def my-data (-> result clean tap))
my-data
(def my-first (-> my-data first))
(def my-sec (map :data (-> my-data second :data)))
(concat my-first my-sec)

View File

@ -11,20 +11,20 @@
"cond" :cond ;; impl "cond" :cond ;; impl
"do" :do ;; impl "do" :do ;; impl
"else" :else ;; impl "else" :else ;; impl
"false" :false ;; impl "false" :false ;; impl -> literal word
"fn" :fn ;; impl "fn" :fn ;; impl
"if" :if ;; impl "if" :if ;; impl
"import" :import ;; impl "import" :import ;; impl
"let" :let ;; impl "let" :let ;; impl
"loop" :loop ;; impl "loop" :loop ;; impl
"match" :match ;; impl "match" :match ;; impl
"nil" :nil ;; impl "nil" :nil ;; impl -> literal word
"ns" :ns ;; impl "ns" :ns ;; impl
;; "panic!" :panic ;; impl (should be a function) ;; "panic!" :panic ;; impl (should be a function)
"recur" :recur ;; impl "recur" :recur ;; impl
"ref" :ref ;; impl "ref" :ref ;; impl
"then" :then ;; impl "then" :then ;; impl
"true" :true ;; impl "true" :true ;; impl -> literal word
"with" :with ;; impl "with" :with ;; impl
;; actor model/concurrency ;; actor model/concurrency
@ -42,6 +42,12 @@
;; "module" :module ;; not necessary if we don't have datatypes ;; "module" :module ;; not necessary if we don't have datatypes
}) })
(def literal-words {
"true" true
"false" false
"nil" nil
})
(defn- new-scanner (defn- new-scanner
"Creates a new scanner." "Creates a new scanner."
[source] [source]
@ -195,7 +201,9 @@
word (str char)] word (str char)]
(let [curr (current-char scanner)] (let [curr (current-char scanner)]
(cond (cond
(terminates? curr) (add-token scanner (get reserved-words word :word)) (terminates? curr) (add-token scanner
(get reserved-words word :word)
(get literal-words word :none))
(word-char? curr) (recur (advance scanner) (str word curr)) (word-char? curr) (recur (advance scanner) (str word curr))
:else (add-error scanner (str "Unexpected " curr " after word " word ".")))))) :else (add-error scanner (str "Unexpected " curr " after word " word "."))))))

47
tokens Normal file
View File

@ -0,0 +1,47 @@
TOKENS:
:nil
:true
:false
:word
:keyword
:number
:string
:as
:cond
:do
:else
:fn
:if
:import
:let
:loop
:ref
:then
:with
:receive
:spawn
:repeat
:test
:when
:lparen
:rparen
:lbrace
:rbrace
:lbracket
:rbracket
:semicolon
:comma
:newline
:backslash
:equals
:pipeline
:rarrow
:startdict
:startstruct
:startset
:splat
:eof