Many iterations of parser combinator strategies. Not yet working.
This commit is contained in:
parent
2866ff4eb6
commit
5b1ff5aef3
|
@ -2,31 +2,226 @@
|
||||||
(:require
|
(:require
|
||||||
[ludus.scanner :as scan]))
|
[ludus.scanner :as scan]))
|
||||||
|
|
||||||
(defn ok? [[ok]]
|
(def msgs {
|
||||||
(= ok :ok))
|
|
||||||
|
|
||||||
(defn kw->type [kw] (apply str (next (str kw))))
|
})
|
||||||
|
|
||||||
(defn match [kw token]
|
(defn ? [val default] (if (nil? val) default val))
|
||||||
|
|
||||||
|
(defn ok? [{status :status}]
|
||||||
|
(= status :ok))
|
||||||
|
|
||||||
|
(defn pass? [{status :status}] (or (= status :ok) (= status :quiet)))
|
||||||
|
|
||||||
|
(defn data [{d :data}] d)
|
||||||
|
|
||||||
|
(defn remaining [{r :remaining}] r)
|
||||||
|
|
||||||
|
(defn pname [parser] (? (:name parser) parser))
|
||||||
|
|
||||||
|
(defn value [token]
|
||||||
|
(if (= :none (:literal token)) (:lexeme token) (:literal token)))
|
||||||
|
|
||||||
|
(defn apply-kw-parser [kw tokens]
|
||||||
|
(let [token (first tokens)]
|
||||||
|
(println "applying kw parser " kw " to " token)
|
||||||
(if (= kw (:type token))
|
(if (= kw (:type token))
|
||||||
[:ok token]
|
{:status :ok :type kw :data [(value token)] :token token :remaining (rest tokens)}
|
||||||
[:error token (str "Expected " (kw->type kw))]))
|
{:status :err :token token :trace [kw] :remaining (rest tokens)})))
|
||||||
|
|
||||||
(defn parser
|
(defn apply-fn-parser [parser tokens]
|
||||||
([kw] {:type kw :fn #(match kw %)})
|
(println "applying fn parser" parser ", " tokens)
|
||||||
([kw err] {:type kw :fn #(assoc (match kw %) 2 err)}))
|
(let [rule (:rule parser) name (:name parser)]
|
||||||
|
(println "appying fn parser " name " to " (first tokens))
|
||||||
|
(rule tokens)))
|
||||||
|
|
||||||
|
(defn apply-parser [parser tokens]
|
||||||
|
(if (keyword? parser)
|
||||||
|
(apply-kw-parser parser tokens)
|
||||||
|
(apply-fn-parser parser tokens)))
|
||||||
|
|
||||||
(defn choice [& args])
|
(defn pmap [f parser] (fn [tokens] (f (apply-parser parser tokens))))
|
||||||
|
|
||||||
(def eg (:tokens (scan/scan "123 :foo")))
|
(defn choice [name parsers]
|
||||||
|
{:name name
|
||||||
|
:rule (fn [tokens]
|
||||||
|
(println "entering CHOICE" name)
|
||||||
|
(loop [ps parsers]
|
||||||
|
(let [result (apply-parser (first ps) tokens)
|
||||||
|
rem-ts (remaining result)
|
||||||
|
rem-ps (rest ps)]
|
||||||
|
(cond
|
||||||
|
(pass? result)
|
||||||
|
{:status :ok :type name :data [result] :token (first tokens) :remaining rem-ts}
|
||||||
|
|
||||||
(def word (parser :word "fuck"))
|
(empty? rem-ps)
|
||||||
|
{:status :err :token (first tokens) :trace [name] :remaining rem-ts}
|
||||||
|
:else (recur rem-ps)))))})
|
||||||
|
|
||||||
(word (first eg))
|
(defn order [name parsers]
|
||||||
|
{:name name
|
||||||
|
:rule (fn [tokens]
|
||||||
|
(println "entering ORDER" name)
|
||||||
|
(let [origin (first tokens)]
|
||||||
|
(loop [ps parsers
|
||||||
|
results []
|
||||||
|
ts tokens]
|
||||||
|
(let [result (apply-parser (first ps) ts)
|
||||||
|
res-rem (remaining result)]
|
||||||
|
(if (empty? (rest ps))
|
||||||
|
(case (:status result)
|
||||||
|
|
||||||
|
:ok {:status :ok
|
||||||
|
:type name
|
||||||
|
:data (conj results result)
|
||||||
|
:token origin
|
||||||
|
:remaining res-rem}
|
||||||
|
|
||||||
|
:quiet {:status :ok
|
||||||
|
:type name
|
||||||
|
:data results
|
||||||
|
:token origin
|
||||||
|
:remaining res-rem}
|
||||||
|
|
||||||
|
:group {:status :ok
|
||||||
|
:type name
|
||||||
|
:data (concat results (:data result))
|
||||||
|
:token origin
|
||||||
|
:remaining res-rem}
|
||||||
|
|
||||||
|
:err (update result :trace #(conj % name)))
|
||||||
|
|
||||||
|
(case (:status result)
|
||||||
|
:ok (recur (rest ps) (conj results result) res-rem)
|
||||||
|
:group (recur (rest ps)
|
||||||
|
;; TODO: fix this?
|
||||||
|
;; This is supposed to undo the :quiet/:group thing
|
||||||
|
(concat results
|
||||||
|
(filter #(= (:status %) :ok) (:data result)))
|
||||||
|
res-rem)
|
||||||
|
:quiet (recur (rest ps) results res-rem)
|
||||||
|
:err (update result :trace #(conj % name))))))))})
|
||||||
|
|
||||||
|
(defn quiet [parser]
|
||||||
|
{:name (? (:name parser) parser)
|
||||||
|
:rule (fn [tokens]
|
||||||
|
(let [result (apply-parser parser tokens)]
|
||||||
|
(if (pass? result)
|
||||||
|
(assoc result :status :quiet)
|
||||||
|
result)))})
|
||||||
|
|
||||||
|
(defn one+
|
||||||
|
([parser] (one+ (pname parser) parser))
|
||||||
|
([name parser]
|
||||||
|
{:name name
|
||||||
|
:rule (fn [tokens]
|
||||||
|
(let [result (apply-parser parser tokens)
|
||||||
|
rest (zero+ name parser)]
|
||||||
|
(case (:status result)
|
||||||
|
(:ok :quiet)
|
||||||
|
(let [rest-result (apply-parser rest (remaining result))
|
||||||
|
rest-data (data rest-result)
|
||||||
|
rest-remaining (remaining rest-result)]
|
||||||
|
(println rest-data)
|
||||||
|
{:status :group
|
||||||
|
:type name
|
||||||
|
:data (concat (data result) (second rest-data))
|
||||||
|
:token (first tokens)
|
||||||
|
:remaining rest-remaining})
|
||||||
|
|
||||||
|
:err result)))}))
|
||||||
|
|
||||||
|
(defn zero+
|
||||||
|
([parser] (zero+ (pname parser) parser))
|
||||||
|
([name parser]
|
||||||
|
{:name name
|
||||||
|
:rule (fn [tokens]
|
||||||
|
(println "entering ZERO+")
|
||||||
|
(loop [results []
|
||||||
|
ts tokens
|
||||||
|
back tokens]
|
||||||
|
(println "looping ZERO+" (:name parser))
|
||||||
|
(let [result (apply-parser parser ts)]
|
||||||
|
(if (pass? result)
|
||||||
|
(recur (conj results result) (remaining result) ts)
|
||||||
|
{:status :group :type name :data results :token (first tokens) :remaining ts}
|
||||||
|
))))}))
|
||||||
|
|
||||||
|
(defn maybe
|
||||||
|
([parser] (maybe (pname parser) parser))
|
||||||
|
([name parser]
|
||||||
|
{:name name
|
||||||
|
:rule (fn [tokens]
|
||||||
|
(let [result (apply-parser parser tokens)]
|
||||||
|
(if (pass? result)
|
||||||
|
result
|
||||||
|
{:status :group :type name :data [] :token (first tokens) :remaining tokens}
|
||||||
|
)))}))
|
||||||
|
|
||||||
(comment
|
(comment
|
||||||
|
"So one thing I'm thinking about is the fact that zero+, one+, maybe all only really make sense in the context of an `order` call. So that idea is that anything that's in one of these should be added to the `order`'s data vector, rather than putting it in a subordinate structure.
|
||||||
|
|
||||||
(def string (parser :string))
|
This is much the same as the `quiet` idea: there should be some kind of internal representation of the thing.
|
||||||
|
|
||||||
)
|
***
|
||||||
|
|
||||||
|
And now the `group` status has broken `quiet`
|
||||||
|
|
||||||
|
|
||||||
|
")
|
||||||
|
|
||||||
|
(defn group
|
||||||
|
([parser] (pname parser) parser)
|
||||||
|
([name parser] (fn [tokens]
|
||||||
|
(let [result (apply-parser parser tokens)
|
||||||
|
data (map :data (:data result))]
|
||||||
|
{assoc result :data data}))))
|
||||||
|
|
||||||
|
|
||||||
|
(declare expression)
|
||||||
|
|
||||||
|
(def literal (choice :literal [:nil :true :false :number :string :keyword]))
|
||||||
|
|
||||||
|
(def separator (one+ (choice :separator [:comma :newline])))
|
||||||
|
|
||||||
|
(def nls? (quiet (zero+ :nls :newline)))
|
||||||
|
|
||||||
|
(def tuple-entries (order :tuple-entries [(quiet separator) expression]))
|
||||||
|
|
||||||
|
(def tuple (order :tuple
|
||||||
|
[(quiet :lparen)
|
||||||
|
(maybe expression)
|
||||||
|
(zero+ tuple-entries)
|
||||||
|
(quiet :rparen)]))
|
||||||
|
|
||||||
|
(def expression (choice :expression [tuple literal]))
|
||||||
|
|
||||||
|
(def foo (order :foo [:number :keyword]))
|
||||||
|
|
||||||
|
(def eg (:tokens (scan/scan "(1, 2, 3)")))
|
||||||
|
|
||||||
|
(def result (apply-parser tuple eg))
|
||||||
|
|
||||||
|
result
|
||||||
|
|
||||||
|
(defn clean [node]
|
||||||
|
(if (map? node)
|
||||||
|
(-> node
|
||||||
|
(dissoc
|
||||||
|
:status
|
||||||
|
:remaining
|
||||||
|
:token)
|
||||||
|
(update :data #(map clean %)))
|
||||||
|
node))
|
||||||
|
|
||||||
|
(defn tap [x] (println "\n\n\n\n******NEW PARSE\n\n:::=> " x "\n\n") x)
|
||||||
|
|
||||||
|
(def my-data (-> result clean tap))
|
||||||
|
|
||||||
|
my-data
|
||||||
|
|
||||||
|
(def my-first (-> my-data first))
|
||||||
|
|
||||||
|
(def my-sec (map :data (-> my-data second :data)))
|
||||||
|
|
||||||
|
(concat my-first my-sec)
|
|
@ -11,20 +11,20 @@
|
||||||
"cond" :cond ;; impl
|
"cond" :cond ;; impl
|
||||||
"do" :do ;; impl
|
"do" :do ;; impl
|
||||||
"else" :else ;; impl
|
"else" :else ;; impl
|
||||||
"false" :false ;; impl
|
"false" :false ;; impl -> literal word
|
||||||
"fn" :fn ;; impl
|
"fn" :fn ;; impl
|
||||||
"if" :if ;; impl
|
"if" :if ;; impl
|
||||||
"import" :import ;; impl
|
"import" :import ;; impl
|
||||||
"let" :let ;; impl
|
"let" :let ;; impl
|
||||||
"loop" :loop ;; impl
|
"loop" :loop ;; impl
|
||||||
"match" :match ;; impl
|
"match" :match ;; impl
|
||||||
"nil" :nil ;; impl
|
"nil" :nil ;; impl -> literal word
|
||||||
"ns" :ns ;; impl
|
"ns" :ns ;; impl
|
||||||
;; "panic!" :panic ;; impl (should be a function)
|
;; "panic!" :panic ;; impl (should be a function)
|
||||||
"recur" :recur ;; impl
|
"recur" :recur ;; impl
|
||||||
"ref" :ref ;; impl
|
"ref" :ref ;; impl
|
||||||
"then" :then ;; impl
|
"then" :then ;; impl
|
||||||
"true" :true ;; impl
|
"true" :true ;; impl -> literal word
|
||||||
"with" :with ;; impl
|
"with" :with ;; impl
|
||||||
|
|
||||||
;; actor model/concurrency
|
;; actor model/concurrency
|
||||||
|
@ -42,6 +42,12 @@
|
||||||
;; "module" :module ;; not necessary if we don't have datatypes
|
;; "module" :module ;; not necessary if we don't have datatypes
|
||||||
})
|
})
|
||||||
|
|
||||||
|
(def literal-words {
|
||||||
|
"true" true
|
||||||
|
"false" false
|
||||||
|
"nil" nil
|
||||||
|
})
|
||||||
|
|
||||||
(defn- new-scanner
|
(defn- new-scanner
|
||||||
"Creates a new scanner."
|
"Creates a new scanner."
|
||||||
[source]
|
[source]
|
||||||
|
@ -195,7 +201,9 @@
|
||||||
word (str char)]
|
word (str char)]
|
||||||
(let [curr (current-char scanner)]
|
(let [curr (current-char scanner)]
|
||||||
(cond
|
(cond
|
||||||
(terminates? curr) (add-token scanner (get reserved-words word :word))
|
(terminates? curr) (add-token scanner
|
||||||
|
(get reserved-words word :word)
|
||||||
|
(get literal-words word :none))
|
||||||
(word-char? curr) (recur (advance scanner) (str word curr))
|
(word-char? curr) (recur (advance scanner) (str word curr))
|
||||||
:else (add-error scanner (str "Unexpected " curr " after word " word "."))))))
|
:else (add-error scanner (str "Unexpected " curr " after word " word "."))))))
|
||||||
|
|
||||||
|
|
47
tokens
Normal file
47
tokens
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
TOKENS:
|
||||||
|
|
||||||
|
:nil
|
||||||
|
:true
|
||||||
|
:false
|
||||||
|
:word
|
||||||
|
:keyword
|
||||||
|
:number
|
||||||
|
:string
|
||||||
|
|
||||||
|
:as
|
||||||
|
:cond
|
||||||
|
:do
|
||||||
|
:else
|
||||||
|
:fn
|
||||||
|
:if
|
||||||
|
:import
|
||||||
|
:let
|
||||||
|
:loop
|
||||||
|
:ref
|
||||||
|
:then
|
||||||
|
:with
|
||||||
|
|
||||||
|
:receive
|
||||||
|
:spawn
|
||||||
|
:repeat
|
||||||
|
:test
|
||||||
|
:when
|
||||||
|
|
||||||
|
:lparen
|
||||||
|
:rparen
|
||||||
|
:lbrace
|
||||||
|
:rbrace
|
||||||
|
:lbracket
|
||||||
|
:rbracket
|
||||||
|
:semicolon
|
||||||
|
:comma
|
||||||
|
:newline
|
||||||
|
:backslash
|
||||||
|
:equals
|
||||||
|
:pipeline
|
||||||
|
:rarrow
|
||||||
|
:startdict
|
||||||
|
:startstruct
|
||||||
|
:startset
|
||||||
|
:splat
|
||||||
|
:eof
|
Loading…
Reference in New Issue
Block a user