Get parser combinator strategy working

This commit is contained in:
Scott Richmond 2023-05-16 16:06:18 -04:00
parent 919ab5ca34
commit cbd78ce7f7

View File

@ -2,16 +2,18 @@
(:require (:require
[ludus.scanner :as scan])) [ludus.scanner :as scan]))
(def msgs {
})
(defn ? [val default] (if (nil? val) default val)) (defn ? [val default] (if (nil? val) default val))
(defn ok? [{status :status}] (defn ok? [{status :status}]
(= status :ok)) (= status :ok))
(defn pass? [{status :status}] (or (= status :ok) (= status :quiet))) (def failing #{:err :none})
(def passing #{:ok :group :silent})
(defn pass? [{status :status}] (contains? passing status))
(defn fail? [{status :status}] (conatins? failing status))
(defn data [{d :data}] d) (defn data [{d :data}] d)
@ -19,33 +21,39 @@
(defn pname [parser] (? (:name parser) parser)) (defn pname [parser] (? (:name parser) parser))
(defn str-part [kw] (apply str (next (str kw))))
(defn kw+str [kw mystr] (keyword (str (str-part kw) mystr)))
(defn value [token] (defn value [token]
(if (= :none (:literal token)) (:lexeme token) (:literal token))) (if (= :none (:literal token)) (:lexeme token) (:literal token)))
(defn apply-kw-parser [kw tokens] (defn apply-kw-parser [kw tokens]
(let [token (first tokens)] (let [token (first tokens)]
(println "applying kw parser " kw " to " token) ;(println "applying kw parser " kw " to " token)
(if (= kw (:type token)) (if (= kw (:type token))
{:status :ok :type kw :data [(value token)] :token token :remaining (rest tokens)} {:status :ok
{:status :err :token token :trace [kw] :remaining (rest tokens)}))) :type kw
:data (if (value token) [(value token)] [])
:token token
:remaining (rest tokens)}
{:status :none :token token :trace [kw] :remaining (rest tokens)})))
(defn apply-fn-parser [parser tokens] (defn apply-fn-parser [parser tokens]
(println "applying fn parser" parser ", " tokens)
(let [rule (:rule parser) name (:name parser)] (let [rule (:rule parser) name (:name parser)]
(println "appying fn parser " name " to " (first tokens)) ;(println "appying fn parser " name " to " (first tokens))
(rule tokens))) (rule tokens)))
(defn apply-parser [parser tokens] (defn apply-parser [parser tokens]
(if (keyword? parser) (cond
(apply-kw-parser parser tokens) (keyword? parser) (apply-kw-parser parser tokens)
(apply-fn-parser parser tokens))) (:rule parser) (apply-fn-parser parser tokens)
:else (throw (Exception. "`apply-parser` requires a parser"))))
(defn pmap [f parser] (fn [tokens] (f (apply-parser parser tokens))))
(defn choice [name parsers] (defn choice [name parsers]
{:name name {:name name
:rule (fn [tokens] :rule (fn choice-fn [tokens]
(println "entering CHOICE" name) ;(println "entering CHOICE" name)
(loop [ps parsers] (loop [ps parsers]
(let [result (apply-parser (first ps) tokens) (let [result (apply-parser (first ps) tokens)
rem-ts (remaining result) rem-ts (remaining result)
@ -54,170 +62,218 @@
(pass? result) (pass? result)
{:status :ok :type name :data [result] :token (first tokens) :remaining rem-ts} {:status :ok :type name :data [result] :token (first tokens) :remaining rem-ts}
(= :err (:status result))
(update result :trace #(conj % name))
(empty? rem-ps) (empty? rem-ps)
{:status :err :token (first tokens) :trace [name] :remaining rem-ts} {:status :none :token (first tokens) :trace [name] :remaining rem-ts}
:else (recur rem-ps)))))}) :else (recur rem-ps)))))})
(defn order [name parsers] (defn order [name parsers]
{:name name {:name name
:rule (fn [tokens] :rule (fn order-fn [tokens]
(println "entering ORDER" name) ;(println "entering ORDER" name)
(let [origin (first tokens)] (let [origin (first tokens)
(loop [ps parsers first-result (apply-parser (first parsers) tokens)]
results [] (case (:status first-result)
ts tokens] (:err :none)
(let [result (apply-parser (first ps) ts) {:status :none
res-rem (remaining result)] :token (first tokens)
(if (empty? (rest ps)) :trace [name]
(case (:status result) :remaining tokens}
:ok {:status :ok (:ok :quiet :group)
:type name (loop [ps (rest parsers)
:data (conj results result) results (case (:status first-result)
:token origin :ok [first-result]
:remaining res-rem} :quiet []
:group (:data first-result))
ts (remaining first-result)]
(let [result (apply-parser (first ps) ts)
res-rem (remaining result)]
(if (empty? (rest ps))
(case (:status result)
:ok {:status :group
:type name
:data (conj results result)
:token origin
:remaining res-rem}
:quiet {:status :ok :quiet {:status :group
:type name :type name
:data results :data results
:token origin :token origin
:remaining res-rem} :remaining res-rem}
:group {:status :ok :group {:status :group
:type name :type name
:data (vec (concat results (:data result))) :data (vec (concat results (:data result)))
:token origin :token origin
:remaining res-rem} :remaining res-rem}
:err (update result :trace #(conj % name))) (:err :none)
(assoc (update result :trace #(conj % name)) :status :err))
(case (:status result) (case (:status result)
:ok (recur (rest ps) (conj results result) res-rem) :ok (recur (rest ps) (conj results result) res-rem)
:group (recur (rest ps) :group (recur (rest ps)
;; TODO: fix this? (vec (concat results (:data result)))
;; This is supposed to undo the :quiet/:group thing res-rem)
(vec (concat results :quiet (recur (rest ps) results res-rem)
(filter #(= (:status %) :ok) (:data result)))) (:err :none)
res-rem) (assoc (update result :trace #(conj % name)) :status :err))))))))})
:quiet (recur (rest ps) results res-rem)
:err (update result :trace #(conj % name))))))))})
(defn quiet [parser] (defn quiet [parser]
{:name (? (:name parser) parser) {:name (kw+str (? (:name parser) parser) "-quiet")
:rule (fn [tokens] :rule (fn quiet-fn [tokens]
(let [result (apply-parser parser tokens)] (let [result (apply-parser parser tokens)]
(if (pass? result) (if (pass? result)
(assoc result :status :quiet) (assoc result :status :quiet)
result)))}) result)))})
(defn zero+ (defn zero+
([parser] (zero+ (pname parser) parser)) ([parser] (zero+ (pname parser) parser))
([name parser] ([name parser]
{:name name {:name (kw+str name "-zero+")
:rule (fn [tokens] :rule (fn zero+fn [tokens]
(println "entering ZERO+") ;(println "entering ZERO+")
(loop [results [] (loop [results []
ts tokens ts tokens]
back tokens] ;(println "looping ZERO+" (? (:name parser) parser))
(println "looping ZERO+" (:name parser)) (let [result (apply-parser parser ts)]
(let [result (apply-parser parser ts)] (case (:status result)
(if (pass? result) :ok (recur (conj results result) (remaining result))
(recur (conj results result) (remaining result) ts) :group (recur (vec (concat results (:data result))) (remaining result))
{:status :group :type name :data results :token (first tokens) :remaining ts} :quiet (recur results (remaining result))
))))})) {:status :group :type name :data results :token (first tokens) :remaining ts}))))}))
(defn one+ (defn one+
([parser] (one+ (pname parser) parser)) ([parser] (one+ (pname parser) parser))
([name parser] ([name parser]
{:name name {:name (kw+str name "-one+")
:rule (fn [tokens] :rule (fn one+fn [tokens]
(let [result (apply-parser parser tokens) (let [first-result (apply-parser parser tokens)
rest (zero+ name parser)] rest-parser (zero+ name parser)]
(case (:status result) (case (:status first-result)
(:ok :quiet) (:ok :group)
(let [rest-result (apply-parser rest (remaining result)) (let [rest-result (apply-parser rest-parser (remaining first-result))]
rest-data (data rest-result) {:status :group
rest-remaining (remaining rest-result)] :type name
(println rest-data) :data (vec (concat [first-result] (data rest-result)))
{:status :group :token (first tokens)
:type name :remaining (remaining rest-result)})
:data (vec (concat (data result) (second rest-data)) )
:token (first tokens)
:remaining rest-remaining})
:err result)))})) :quiet
(let [rest-result (apply-parser rest-parser (remaining first-result))]
{:status :quiet
:type name
:data []
:token (first tokens)
:remaining (remaining rest-result)})
(:err :none) first-result)))}))
(defn maybe (defn maybe
([parser] (maybe (pname parser) parser)) ([parser] (maybe (pname parser) parser))
([name parser] ([name parser]
{:name name {:name (kw+str name "-maybe")
:rule (fn [tokens] :rule (fn maybe-fn [tokens]
(let [result (apply-parser parser tokens)] (let [result (apply-parser parser tokens)]
(if (pass? result) (if (pass? result)
result result
{:status :group :type name :data [] :token (first tokens) :remaining tokens} {:status :group :type name :data [] :token (first tokens) :remaining tokens}
)))})) )))}))
(comment (comment
"So one thing I'm thinking about is the fact that zero+, one+, maybe all only really make sense in the context of an `order` call. So that idea is that anything that's in one of these should be added to the `order`'s data vector, rather than putting it in a subordinate structure. "
If I'm not mistaken, the Ludus grammer requires *no* lookahead, the first token in an expression tells you what kind of expression it is:
This is much the same as the `quiet` idea: there should be some kind of internal representation of the thing. Rather, there is one ambiguity: synthetic expressions can start with words or keywords.
A bare word can be assimilated to synthetic expressions. Interestingly, so can synthetic.
*** The parsing strategy is the same: consume as many things until you can't get anymore.
And now the `group` status has broken `quiet` The fact that a bare keyword is evaluated like a literal doesn't matter.
TODO: the concats put things into lists/seqs, and thus lett and iff are out of order. So:
literal -> literal
keyword -> synthetic
word -> synthetic
( -> tuple
[ -> list
#{ -> dict
@{ -> struct
ns -> ns
let -> let
do -> pipeline
etc.
") Because there's now NO lookahead, we can easily distinguish between orderings that don't match at all, and ones which match on the first token.
(defn group Because of that, we can also distinguish between no-match and errors
([parser] (pname parser) parser)
([name parser] (fn [tokens] ")
(let [result (apply-parser parser tokens)
data (map :data (:data result))]
{assoc result :data data}))))
(declare expression) (declare expression)
(def literal (choice :literal [:nil :true :false :number :string :keyword])) (def literal (choice :literal [:nil :true :false :number :string]))
(def separator (one+ (choice :separator [:comma :newline]))) (def separator (choice :separator [:comma :newline]))
(def nls? (quiet (zero+ :nls :newline))) (def nls? (quiet (zero+ :nls :newline)))
(def pattern (choice :pattern [:literal :word])) ;; stupid to start (def pattern (choice :pattern [:literal :word])) ;; stupid to start
(def iff (order :iff [ (def iff (order :iff [
(quiet :if) nls? (quiet :if)
expression nls?
nls? (quiet :then) expression
expression nls?
nls? (quiet :else) (quiet :then)
expression])) expression
nls?
(quiet :else)
expression]))
(def lett (order :let [ (def lett (order :let [
(quiet :let) (quiet :let)
pattern pattern
(quiet :equals) (quiet :equals)
nls? nls?
expression])) expression]))
(def tuple-entries (order :tuple-entries [(quiet separator) expression])) (def tuple-entry (order :tuple-entry [(quiet (one+ separator)) expression]))
(def tuple (order :tuple (def tuple (order :tuple
[(quiet :lparen) [(quiet :lparen)
(maybe expression) (quiet (zero+ separator))
(zero+ tuple-entries) (maybe expression)
(quiet :rparen)])) (zero+ tuple-entry)
(quiet (zero+ separator))
(quiet :rparen)]))
(def splat (order :splat [(quiet :splat) :word]))
(def list-term (choice :list-term [splat expression]))
(def list-entry (order :list-entry [(quiet (one+ separator)) list-term]))
(def listt (order :list
[(quiet :lbracket)
(quiet (zero+ separator))
(maybe list-term)
(zero+ list-entry)
(quiet (zero+ separator))
(quiet :rbracket)]))
(def synth-root (choice :synth-root [:keyword :word])) (def synth-root (choice :synth-root [:keyword :word]))
(def synth-term (choice :synth-term [:tuple :keyword])) (def synth-term (choice :synth-term [tuple :keyword]))
(def synthetic (order :synthetic [synth-root (one+ synth-term)])) (def synthetic (order :synthetic [synth-root (zero+ synth-term)]))
(def terminator (choice :terminator [:newline :semicolon])) (def terminator (choice :terminator [:newline :semicolon]))
@ -225,25 +281,40 @@
(def block (order :block [(quiet :lbrace) nls? expression (zero+ block-line) nls? (quiet :rbrace)])) (def block (order :block [(quiet :lbrace) nls? expression (zero+ block-line) nls? (quiet :rbrace)]))
(def expression (choice :expression [tuple literal lett iff synthetic :word block])) (def expression (choice :expression [lett iff synthetic block listt tuple literal]))
(def foo (order :foo [:number :keyword])) (def importt (order :import [(quiet :import) :string (quiet :as) :word]))
(def eg (:tokens (scan/scan "let foo = :bar"))) (def toplevel (choice :toplevel [importt expression]))
(def result (apply-parser expression eg)) (def script-line (order :script-line [(quiet terminator) toplevel]))
(def script (order :script [nls? toplevel (zero+ script-line) nls? (quiet :eof)]))
(def eg (:tokens (scan/scan
""
)))
eg
(println eg)
(def result (apply-parser script eg))
result result
(println result)
(defn clean [node] (defn clean [node]
(if (map? node) (if (map? node)
(-> node (-> node
(dissoc (dissoc
:status :status
:remaining :remaining
:token) :token)
(update :data #(map clean %))) (update :data #(into [] (map clean) %)))
node)) node))
(defn tap [x] (println "\n\n\n\n******NEW PARSE\n\n:::=> " x "\n\n") x) (defn tap [x] (println "\n\n\n\n******NEW PARSE\n\n:::=> " x "\n\n") x)