Compare commits

..

3 Commits

Author SHA1 Message Date
Scott Richmond
f3778792b3 parse interpolated strings/string patterns 2024-05-09 18:30:13 -04:00
Scott Richmond
248e424993 moar bugfixes 2024-05-09 18:29:51 -04:00
Scott Richmond
3f16e45204 fix escaping brace bug, which was fixing next-char bug; also clean some stuff up 2024-05-09 16:35:22 -04:00
2 changed files with 89 additions and 24 deletions

View File

@ -1,5 +1,8 @@
### A recursive descent parser for Ludus
### We still need to scan some things
(os/cd "janet") # when in repl to do relative imports
(import ./scanner :as s)
### First, some mutual recursion helpers
(defn unreachable
@ -171,6 +174,76 @@
(advance parser)
{:type :string :data (curr :literal) :token curr})
# interpolated strings, which are a whole other scene
(defn- scan-interpolations [data]
(print "scanning interpolation: " data)
(when (buffer? data) (break data))
(pp data)
(def to-scan (data :to-scan))
(def {:tokens tokens :errors errors} (s/scan to-scan))
(pp tokens)
(print "there are " (length tokens) " tokens")
(def first-token (first tokens))
(cond
(first errors) (first errors)
(empty? tokens)
{:type :error :msg "string interpolations/patterns must be single words"}
(< 3 (length tokens))
{:type :error :msg "string interpolations/patterns must be single words"}
(= :word (first-token :type))
{:type :word :data (first-token :lexeme) :token first-token}
:else {:type :error :msg "string interpolations/patterns must be single words"}))
(def foo [{:foo :bar}])
(-> foo first (get :foo))
(defn- is-error? [data]
(cond
(buffer? data) false
(= :error (data :type)) true
false))
(defn- interpolated [parser]
(expect parser :interpolated)
(def origin (current parser))
(def source (origin :literal))
(def data @[])
(var curr @"")
(var interp? false)
(var escape? false)
(each code source
(def char (string/from-bytes code))
(cond
(= char "\\") (set escape? true)
escape? (if (= char "{")
(do
(buffer/push curr "{")
(set escape? false))
(do
(buffer/push curr "\\")
(buffer/push curr char)
(set escape? false)))
(= char "{") (do
(set interp? true)
(array/push data curr)
(set curr @""))
(= char "}") (if interp? (do
(set interp? false)
(array/push data {:to-scan curr})
(set curr @""))
(buffer/push curr char))
:else (buffer/push curr char)))
(array/push data curr)
(def interpolated (map scan-interpolations data))
(advance parser)
(def ast {:type :interpolated :data interpolated :token origin})
(if (some is-error? interpolated)
(do
(def err {:type :error :msg "bad interpolated string" :data ast :token origin})
(array/push (parser :errors) err)
err)
ast))
# words & synthetic expressions
(def separates [:break :newline :comma])
@ -338,7 +411,7 @@
:true (bool parser)
:false (bool parser)
:keyword (kw parser)
:number (number parser)
:number (num parser)
:string (str parser)
:word (word-pattern parser)
:placeholder (placeholder parser)
@ -434,6 +507,7 @@
:number (num parser)
:keyword (kw parser)
:string (str parser)
:interpolated (interpolated parser)
:lparen (tup parser)
:lbracket (list parser)
:startdict (dict parser)
@ -516,7 +590,7 @@
)
(defrec toplevel [parser]
(def when (current parser))
(def curr (current parser))
(case (type curr)
:pkg (unreachable)
:ns (unreachable)
@ -547,16 +621,12 @@
)
)
(os/cd "janet") # when repl to do relative imports
(import ./scanner :as s)
(do
#(comment
(def source `"foo {bar} \{baz"`)
(def source `"foo { bar } baz \{quux} {fuzz}"`)
(def scanned (s/scan source))
# (def a-parser (new-parser scanned))
# (def parsed (whenn a-parser))
# (-> parsed)
(first (scanned :tokens))
(def a-parser (new-parser scanned))
(def parsed (simple a-parser))
(-> parsed)
)

View File

@ -71,7 +71,7 @@
length (length source)]
(if (>= next length)
nil
(string/from-bytes (get source current)))))
(string/from-bytes (get source next)))))
(defn- current-lexeme
[scanner]
@ -126,7 +126,6 @@
"&" true
"," true
">" true
# nil was in here; I don't want to be passing in literal nil to this function anyway, and "nil" is also not it...
"\"" true})
(defn- terminates? [c]
@ -171,9 +170,7 @@
(defn- read-literal [lit] (-> lit parse-all first))
## TODO: improve number parsing?
## Currently this uses Clojure's number formatting rules (since we use the EDN reader)
## These rules are here: https://cljs.github.io/api/syntax/number
### TODO: consider whether Janet's number rules are right for Ludus
(defn- add-number [char scanner]
(defn recur [scanner num float?]
(let [curr (current-char scanner)]
@ -189,7 +186,6 @@
(defn- add-string
[scanner]
(print "Adding string")
(defn recur [scanner buff interpolate?]
(let [char (current-char scanner)]
(case char
@ -197,14 +193,13 @@
# allow multiline strings
"\n" (recur (update (advance scanner) :line inc) (buffer/push buff char) interpolate?)
"\"" (add-token (advance scanner) (if interpolate? :interpolated :string)(string buff))
### FIXME: Actually add the escaped character to the string;
### The only weird escapy-thing is actually the lbrace
### So only do anything fancy if the next char is "{"
"\\" (let [next (next-char scanner)
scanner (if (= next "\n")
(update scanner :line inc)
scanner)]
(recur (advance (advance scanner)) (buffer/push buff next) interpolate?))
"\\" (let [next (next-char scanner)]
(if (= next "{")
(do
(buffer/push buff char)
(buffer/push buff next)
(recur (advance (advance scanner)) buff interpolate?))
(recur (advance scanner) (buffer/push buff char) interpolate?)))
(if (at-end? scanner)
(add-error scanner "Unterminated string.")
(recur (advance scanner) (buffer/push buff char) interpolate?)))))