Get parser combinator strategy working

This commit is contained in:
Scott Richmond 2023-05-16 16:06:18 -04:00
parent 919ab5ca34
commit cbd78ce7f7

View File

@ -2,16 +2,18 @@
(:require
[ludus.scanner :as scan]))
(def msgs {
})
(defn ? [val default] (if (nil? val) default val))
(defn ok? [{status :status}]
(= status :ok))
(defn pass? [{status :status}] (or (= status :ok) (= status :quiet)))
(def failing #{:err :none})
(def passing #{:ok :group :silent})
(defn pass? [{status :status}] (contains? passing status))
(defn fail? [{status :status}] (conatins? failing status))
(defn data [{d :data}] d)
@ -19,33 +21,39 @@
(defn pname [parser] (? (:name parser) parser))
(defn str-part [kw] (apply str (next (str kw))))
(defn kw+str [kw mystr] (keyword (str (str-part kw) mystr)))
(defn value [token]
(if (= :none (:literal token)) (:lexeme token) (:literal token)))
(defn apply-kw-parser [kw tokens]
(let [token (first tokens)]
(println "applying kw parser " kw " to " token)
;(println "applying kw parser " kw " to " token)
(if (= kw (:type token))
{:status :ok :type kw :data [(value token)] :token token :remaining (rest tokens)}
{:status :err :token token :trace [kw] :remaining (rest tokens)})))
{:status :ok
:type kw
:data (if (value token) [(value token)] [])
:token token
:remaining (rest tokens)}
{:status :none :token token :trace [kw] :remaining (rest tokens)})))
(defn apply-fn-parser [parser tokens]
(println "applying fn parser" parser ", " tokens)
(let [rule (:rule parser) name (:name parser)]
(println "appying fn parser " name " to " (first tokens))
;(println "appying fn parser " name " to " (first tokens))
(rule tokens)))
(defn apply-parser [parser tokens]
(if (keyword? parser)
(apply-kw-parser parser tokens)
(apply-fn-parser parser tokens)))
(defn pmap [f parser] (fn [tokens] (f (apply-parser parser tokens))))
(cond
(keyword? parser) (apply-kw-parser parser tokens)
(:rule parser) (apply-fn-parser parser tokens)
:else (throw (Exception. "`apply-parser` requires a parser"))))
(defn choice [name parsers]
{:name name
:rule (fn [tokens]
(println "entering CHOICE" name)
:rule (fn choice-fn [tokens]
;(println "entering CHOICE" name)
(loop [ps parsers]
(let [result (apply-parser (first ps) tokens)
rem-ts (remaining result)
@ -54,57 +62,71 @@
(pass? result)
{:status :ok :type name :data [result] :token (first tokens) :remaining rem-ts}
(= :err (:status result))
(update result :trace #(conj % name))
(empty? rem-ps)
{:status :err :token (first tokens) :trace [name] :remaining rem-ts}
{:status :none :token (first tokens) :trace [name] :remaining rem-ts}
:else (recur rem-ps)))))})
(defn order [name parsers]
{:name name
:rule (fn [tokens]
(println "entering ORDER" name)
(let [origin (first tokens)]
(loop [ps parsers
results []
ts tokens]
:rule (fn order-fn [tokens]
;(println "entering ORDER" name)
(let [origin (first tokens)
first-result (apply-parser (first parsers) tokens)]
(case (:status first-result)
(:err :none)
{:status :none
:token (first tokens)
:trace [name]
:remaining tokens}
(:ok :quiet :group)
(loop [ps (rest parsers)
results (case (:status first-result)
:ok [first-result]
:quiet []
:group (:data first-result))
ts (remaining first-result)]
(let [result (apply-parser (first ps) ts)
res-rem (remaining result)]
(if (empty? (rest ps))
(case (:status result)
:ok {:status :ok
:ok {:status :group
:type name
:data (conj results result)
:token origin
:remaining res-rem}
:quiet {:status :ok
:quiet {:status :group
:type name
:data results
:token origin
:remaining res-rem}
:group {:status :ok
:group {:status :group
:type name
:data (vec (concat results (:data result)))
:token origin
:remaining res-rem}
:err (update result :trace #(conj % name)))
(:err :none)
(assoc (update result :trace #(conj % name)) :status :err))
(case (:status result)
:ok (recur (rest ps) (conj results result) res-rem)
:group (recur (rest ps)
;; TODO: fix this?
;; This is supposed to undo the :quiet/:group thing
(vec (concat results
(filter #(= (:status %) :ok) (:data result))))
(vec (concat results (:data result)))
res-rem)
:quiet (recur (rest ps) results res-rem)
:err (update result :trace #(conj % name))))))))})
(:err :none)
(assoc (update result :trace #(conj % name)) :status :err))))))))})
(defn quiet [parser]
{:name (? (:name parser) parser)
:rule (fn [tokens]
{:name (kw+str (? (:name parser) parser) "-quiet")
:rule (fn quiet-fn [tokens]
(let [result (apply-parser parser tokens)]
(if (pass? result)
(assoc result :status :quiet)
@ -113,45 +135,50 @@
(defn zero+
([parser] (zero+ (pname parser) parser))
([name parser]
{:name name
:rule (fn [tokens]
(println "entering ZERO+")
{:name (kw+str name "-zero+")
:rule (fn zero+fn [tokens]
;(println "entering ZERO+")
(loop [results []
ts tokens
back tokens]
(println "looping ZERO+" (:name parser))
ts tokens]
;(println "looping ZERO+" (? (:name parser) parser))
(let [result (apply-parser parser ts)]
(if (pass? result)
(recur (conj results result) (remaining result) ts)
{:status :group :type name :data results :token (first tokens) :remaining ts}
))))}))
(case (:status result)
:ok (recur (conj results result) (remaining result))
:group (recur (vec (concat results (:data result))) (remaining result))
:quiet (recur results (remaining result))
{:status :group :type name :data results :token (first tokens) :remaining ts}))))}))
(defn one+
([parser] (one+ (pname parser) parser))
([name parser]
{:name name
:rule (fn [tokens]
(let [result (apply-parser parser tokens)
rest (zero+ name parser)]
(case (:status result)
(:ok :quiet)
(let [rest-result (apply-parser rest (remaining result))
rest-data (data rest-result)
rest-remaining (remaining rest-result)]
(println rest-data)
{:name (kw+str name "-one+")
:rule (fn one+fn [tokens]
(let [first-result (apply-parser parser tokens)
rest-parser (zero+ name parser)]
(case (:status first-result)
(:ok :group)
(let [rest-result (apply-parser rest-parser (remaining first-result))]
{:status :group
:type name
:data (vec (concat (data result) (second rest-data)) )
:data (vec (concat [first-result] (data rest-result)))
:token (first tokens)
:remaining rest-remaining})
:remaining (remaining rest-result)})
:err result)))}))
:quiet
(let [rest-result (apply-parser rest-parser (remaining first-result))]
{:status :quiet
:type name
:data []
:token (first tokens)
:remaining (remaining rest-result)})
(:err :none) first-result)))}))
(defn maybe
([parser] (maybe (pname parser) parser))
([name parser]
{:name name
:rule (fn [tokens]
{:name (kw+str name "-maybe")
:rule (fn maybe-fn [tokens]
(let [result (apply-parser parser tokens)]
(if (pass? result)
result
@ -159,43 +186,56 @@
)))}))
(comment
"So one thing I'm thinking about is the fact that zero+, one+, maybe all only really make sense in the context of an `order` call. So that idea is that anything that's in one of these should be added to the `order`'s data vector, rather than putting it in a subordinate structure.
"
If I'm not mistaken, the Ludus grammer requires *no* lookahead, the first token in an expression tells you what kind of expression it is:
This is much the same as the `quiet` idea: there should be some kind of internal representation of the thing.
Rather, there is one ambiguity: synthetic expressions can start with words or keywords.
A bare word can be assimilated to synthetic expressions. Interestingly, so can synthetic.
***
The parsing strategy is the same: consume as many things until you can't get anymore.
And now the `group` status has broken `quiet`
The fact that a bare keyword is evaluated like a literal doesn't matter.
TODO: the concats put things into lists/seqs, and thus lett and iff are out of order.
So:
literal -> literal
keyword -> synthetic
word -> synthetic
( -> tuple
[ -> list
#{ -> dict
@{ -> struct
ns -> ns
let -> let
do -> pipeline
etc.
Because there's now NO lookahead, we can easily distinguish between orderings that don't match at all, and ones which match on the first token.
Because of that, we can also distinguish between no-match and errors
")
(defn group
([parser] (pname parser) parser)
([name parser] (fn [tokens]
(let [result (apply-parser parser tokens)
data (map :data (:data result))]
{assoc result :data data}))))
(declare expression)
(def literal (choice :literal [:nil :true :false :number :string :keyword]))
(def literal (choice :literal [:nil :true :false :number :string]))
(def separator (one+ (choice :separator [:comma :newline])))
(def separator (choice :separator [:comma :newline]))
(def nls? (quiet (zero+ :nls :newline)))
(def pattern (choice :pattern [:literal :word])) ;; stupid to start
(def iff (order :iff [
(quiet :if) nls?
(quiet :if)
nls?
expression
nls? (quiet :then)
nls?
(quiet :then)
expression
nls? (quiet :else)
nls?
(quiet :else)
expression]))
(def lett (order :let [
@ -205,19 +245,35 @@
nls?
expression]))
(def tuple-entries (order :tuple-entries [(quiet separator) expression]))
(def tuple-entry (order :tuple-entry [(quiet (one+ separator)) expression]))
(def tuple (order :tuple
[(quiet :lparen)
(quiet (zero+ separator))
(maybe expression)
(zero+ tuple-entries)
(zero+ tuple-entry)
(quiet (zero+ separator))
(quiet :rparen)]))
(def splat (order :splat [(quiet :splat) :word]))
(def list-term (choice :list-term [splat expression]))
(def list-entry (order :list-entry [(quiet (one+ separator)) list-term]))
(def listt (order :list
[(quiet :lbracket)
(quiet (zero+ separator))
(maybe list-term)
(zero+ list-entry)
(quiet (zero+ separator))
(quiet :rbracket)]))
(def synth-root (choice :synth-root [:keyword :word]))
(def synth-term (choice :synth-term [:tuple :keyword]))
(def synth-term (choice :synth-term [tuple :keyword]))
(def synthetic (order :synthetic [synth-root (one+ synth-term)]))
(def synthetic (order :synthetic [synth-root (zero+ synth-term)]))
(def terminator (choice :terminator [:newline :semicolon]))
@ -225,16 +281,31 @@
(def block (order :block [(quiet :lbrace) nls? expression (zero+ block-line) nls? (quiet :rbrace)]))
(def expression (choice :expression [tuple literal lett iff synthetic :word block]))
(def expression (choice :expression [lett iff synthetic block listt tuple literal]))
(def foo (order :foo [:number :keyword]))
(def importt (order :import [(quiet :import) :string (quiet :as) :word]))
(def eg (:tokens (scan/scan "let foo = :bar")))
(def toplevel (choice :toplevel [importt expression]))
(def result (apply-parser expression eg))
(def script-line (order :script-line [(quiet terminator) toplevel]))
(def script (order :script [nls? toplevel (zero+ script-line) nls? (quiet :eof)]))
(def eg (:tokens (scan/scan
""
)))
eg
(println eg)
(def result (apply-parser script eg))
result
(println result)
(defn clean [node]
(if (map? node)
(-> node
@ -242,7 +313,7 @@ result
:status
:remaining
:token)
(update :data #(map clean %)))
(update :data #(into [] (map clean) %)))
node))
(defn tap [x] (println "\n\n\n\n******NEW PARSE\n\n:::=> " x "\n\n") x)