Compare commits

...

3 Commits

Author SHA1 Message Date
Scott Richmond
f3778792b3 parse interpolated strings/string patterns 2024-05-09 18:30:13 -04:00
Scott Richmond
248e424993 moar bugfixes 2024-05-09 18:29:51 -04:00
Scott Richmond
3f16e45204 fix escaping brace bug, which was fixing next-char bug; also clean some stuff up 2024-05-09 16:35:22 -04:00
2 changed files with 89 additions and 24 deletions

View File

@ -1,5 +1,8 @@
### A recursive descent parser for Ludus ### A recursive descent parser for Ludus
### We still need to scan some things
(os/cd "janet") # when in repl to do relative imports
(import ./scanner :as s)
### First, some mutual recursion helpers ### First, some mutual recursion helpers
(defn unreachable (defn unreachable
@ -171,6 +174,76 @@
(advance parser) (advance parser)
{:type :string :data (curr :literal) :token curr}) {:type :string :data (curr :literal) :token curr})
# interpolated strings, which are a whole other scene
(defn- scan-interpolations [data]
(print "scanning interpolation: " data)
(when (buffer? data) (break data))
(pp data)
(def to-scan (data :to-scan))
(def {:tokens tokens :errors errors} (s/scan to-scan))
(pp tokens)
(print "there are " (length tokens) " tokens")
(def first-token (first tokens))
(cond
(first errors) (first errors)
(empty? tokens)
{:type :error :msg "string interpolations/patterns must be single words"}
(< 3 (length tokens))
{:type :error :msg "string interpolations/patterns must be single words"}
(= :word (first-token :type))
{:type :word :data (first-token :lexeme) :token first-token}
:else {:type :error :msg "string interpolations/patterns must be single words"}))
(def foo [{:foo :bar}])
(-> foo first (get :foo))
(defn- is-error? [data]
(cond
(buffer? data) false
(= :error (data :type)) true
false))
(defn- interpolated [parser]
(expect parser :interpolated)
(def origin (current parser))
(def source (origin :literal))
(def data @[])
(var curr @"")
(var interp? false)
(var escape? false)
(each code source
(def char (string/from-bytes code))
(cond
(= char "\\") (set escape? true)
escape? (if (= char "{")
(do
(buffer/push curr "{")
(set escape? false))
(do
(buffer/push curr "\\")
(buffer/push curr char)
(set escape? false)))
(= char "{") (do
(set interp? true)
(array/push data curr)
(set curr @""))
(= char "}") (if interp? (do
(set interp? false)
(array/push data {:to-scan curr})
(set curr @""))
(buffer/push curr char))
:else (buffer/push curr char)))
(array/push data curr)
(def interpolated (map scan-interpolations data))
(advance parser)
(def ast {:type :interpolated :data interpolated :token origin})
(if (some is-error? interpolated)
(do
(def err {:type :error :msg "bad interpolated string" :data ast :token origin})
(array/push (parser :errors) err)
err)
ast))
# words & synthetic expressions # words & synthetic expressions
(def separates [:break :newline :comma]) (def separates [:break :newline :comma])
@ -338,7 +411,7 @@
:true (bool parser) :true (bool parser)
:false (bool parser) :false (bool parser)
:keyword (kw parser) :keyword (kw parser)
:number (number parser) :number (num parser)
:string (str parser) :string (str parser)
:word (word-pattern parser) :word (word-pattern parser)
:placeholder (placeholder parser) :placeholder (placeholder parser)
@ -434,6 +507,7 @@
:number (num parser) :number (num parser)
:keyword (kw parser) :keyword (kw parser)
:string (str parser) :string (str parser)
:interpolated (interpolated parser)
:lparen (tup parser) :lparen (tup parser)
:lbracket (list parser) :lbracket (list parser)
:startdict (dict parser) :startdict (dict parser)
@ -516,7 +590,7 @@
) )
(defrec toplevel [parser] (defrec toplevel [parser]
(def when (current parser)) (def curr (current parser))
(case (type curr) (case (type curr)
:pkg (unreachable) :pkg (unreachable)
:ns (unreachable) :ns (unreachable)
@ -547,16 +621,12 @@
) )
) )
(os/cd "janet") # when repl to do relative imports
(import ./scanner :as s)
(do (do
#(comment #(comment
(def source `"foo {bar} \{baz"`) (def source `"foo { bar } baz \{quux} {fuzz}"`)
(def scanned (s/scan source)) (def scanned (s/scan source))
# (def a-parser (new-parser scanned)) (def a-parser (new-parser scanned))
# (def parsed (whenn a-parser)) (def parsed (simple a-parser))
# (-> parsed) (-> parsed)
(first (scanned :tokens))
) )

View File

@ -71,7 +71,7 @@
length (length source)] length (length source)]
(if (>= next length) (if (>= next length)
nil nil
(string/from-bytes (get source current))))) (string/from-bytes (get source next)))))
(defn- current-lexeme (defn- current-lexeme
[scanner] [scanner]
@ -126,7 +126,6 @@
"&" true "&" true
"," true "," true
">" true ">" true
# nil was in here; I don't want to be passing in literal nil to this function anyway, and "nil" is also not it...
"\"" true}) "\"" true})
(defn- terminates? [c] (defn- terminates? [c]
@ -171,9 +170,7 @@
(defn- read-literal [lit] (-> lit parse-all first)) (defn- read-literal [lit] (-> lit parse-all first))
## TODO: improve number parsing? ### TODO: consider whether Janet's number rules are right for Ludus
## Currently this uses Clojure's number formatting rules (since we use the EDN reader)
## These rules are here: https://cljs.github.io/api/syntax/number
(defn- add-number [char scanner] (defn- add-number [char scanner]
(defn recur [scanner num float?] (defn recur [scanner num float?]
(let [curr (current-char scanner)] (let [curr (current-char scanner)]
@ -189,7 +186,6 @@
(defn- add-string (defn- add-string
[scanner] [scanner]
(print "Adding string")
(defn recur [scanner buff interpolate?] (defn recur [scanner buff interpolate?]
(let [char (current-char scanner)] (let [char (current-char scanner)]
(case char (case char
@ -197,14 +193,13 @@
# allow multiline strings # allow multiline strings
"\n" (recur (update (advance scanner) :line inc) (buffer/push buff char) interpolate?) "\n" (recur (update (advance scanner) :line inc) (buffer/push buff char) interpolate?)
"\"" (add-token (advance scanner) (if interpolate? :interpolated :string)(string buff)) "\"" (add-token (advance scanner) (if interpolate? :interpolated :string)(string buff))
### FIXME: Actually add the escaped character to the string; "\\" (let [next (next-char scanner)]
### The only weird escapy-thing is actually the lbrace (if (= next "{")
### So only do anything fancy if the next char is "{" (do
"\\" (let [next (next-char scanner) (buffer/push buff char)
scanner (if (= next "\n") (buffer/push buff next)
(update scanner :line inc) (recur (advance (advance scanner)) buff interpolate?))
scanner)] (recur (advance scanner) (buffer/push buff char) interpolate?)))
(recur (advance (advance scanner)) (buffer/push buff next) interpolate?))
(if (at-end? scanner) (if (at-end? scanner)
(add-error scanner "Unterminated string.") (add-error scanner "Unterminated string.")
(recur (advance scanner) (buffer/push buff char) interpolate?))))) (recur (advance scanner) (buffer/push buff char) interpolate?)))))