From cb7098ac4e61871d2192361b4e9e553a2404d443 Mon Sep 17 00:00:00 2001 From: Scott Richmond Date: Tue, 16 Jul 2024 19:40:40 -0400 Subject: [PATCH] start fixing off-by-one errors: script, block, tuple, args, tup-patt --- src/parser.janet | 68 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 19 deletions(-) diff --git a/src/parser.janet b/src/parser.janet index 4d047bb..d04e675 100644 --- a/src/parser.janet +++ b/src/parser.janet @@ -3,6 +3,9 @@ ### We still need to scan some things (import /src/scanner :as s) +# stash janet type +(def janet-type type) + (defmacro declare "Forward-declares a function name, so that it can be called in a mutually recursive manner." [& names] @@ -75,7 +78,7 @@ (has-value? terminators ttype)) # breakers are what terminate panics -(def breaking [:break :newline :semicolon :comma :eof :then :else]) +(def breaking [:break :newline :semicolon :comma :eof :then :else :arrow]) (defn- breaks? "Returns true if the current token in the parser should break a panic" @@ -89,11 +92,15 @@ [parser message] # (print "Panic in the parser: " message) (def origin (current parser)) - (advance parser) + # (advance parser) (def skipped @[origin]) (while (not (breaks? parser)) (array/push skipped (current parser)) (advance parser)) + # but we actually don't want to advance past all breaking tokens + # this is the source of the off-by-one errors + # only newlines and semicolons should be skipped here; everything else needs to be parsed normaly + # wait, except for we don't actually advance past the breaking token (array/push skipped (current parser)) (def err {:type :error :data skipped :token origin :msg message}) (update parser :errors array/push err) @@ -279,8 +286,8 @@ (def origin (current parser)) (advance parser) # consume the :lparen (def ast @{:type :args :data @[] :token origin :partial false}) - (while (separates? parser) (advance parser)) # consume any separators (while (not (check parser :rparen)) + (accept-many parser ;separates) (when (check parser :eof) (def err {:type :error :token origin :msg "unclosed paren"}) (array/push (parser :errors) err) @@ -299,8 +306,7 @@ {:type :placeholder :token origin})) (capture nonbinding parser))) (array/push (ast :data) term) - (try (separators parser) - ([e] (array/push (ast :data) e)))) + (capture separators parser)) (advance parser) ast) @@ -337,16 +343,16 @@ (def origin (current parser)) (advance parser) # consume the :lparen (def ast {:type :tuple :data @[] :token origin}) - (while (separates? parser) (advance parser)) # consume any separators + # (while (separates? parser) (advance parser)) # consume any separators (while (not (check parser :rparen)) + (accept-many parser ;separates) (when (check parser :eof) (def err {:type :error :token origin :msg "unclosed paren"}) (array/push (parser :errors) err) (error err)) (def term (capture nonbinding parser)) (array/push (ast :data) term) - (try (separators parser) - ([e] (array/push (ast :data) e)))) + (capture separators parser)) (advance parser) ast) @@ -452,8 +458,8 @@ (def origin (current parser)) (advance parser) # consume the :lparen (def ast {:type :tuple :data @[] :token origin}) - (while (separates? parser) (advance parser)) # consume any separators (while (not (check parser :rparen)) + (accept-many parser ;separates) (when (check parser :eof) (def err {:type :error :token origin :msg "unclosed paren"}) (array/push (parser :errors) err) @@ -466,8 +472,7 @@ {:type :splat :data splatted :token origin}) (capture pattern parser))) (array/push (ast :data) term) - (try (separators parser) - ([e] (array/push (ast :data) e)))) + (capture separators parser)) (advance parser) ast) @@ -573,9 +578,12 @@ (array/push (ast :data) (capture nonbinding parser)) ast) +(defn- literal-terminator? [token] + (def tok-type (token :type)) + (or (= :newline tok-type) (= :semicolon tok-type))) + (defn- terminator [parser] (if-not (terminates? parser) - # this line panics, captures the panic, advances the parser, and re-throws the error; solves an off-by-one error (panic parser "expected terminator")) (advance parser) (while (terminates? parser) (advance parser))) @@ -798,13 +806,13 @@ (defn- block [parser] (def origin (current parser)) (expect parser :lbrace) (advance parser) - (accept-many parser ;terminators) (def data @[]) (while (not (check parser :rbrace)) + (accept-many parser ;terminators) (if (check parser :eof) (error {:type :error :token origin :data data :msg "unclosed brace"})) (array/push data (capture expr parser)) - (terminator parser)) + (capture terminator parser)) (advance parser) {:type :block :data data :token origin}) @@ -1103,7 +1111,7 @@ (def origin (current parser)) (def lines @[]) (while (not (check parser :eof)) - (accept-many parser :newline) + (accept-many parser ;terminators) (array/push lines (capture toplevel parser)) (capture terminator parser)) {:type :script :data lines :token origin}) @@ -1114,13 +1122,35 @@ (set (parser :ast) ast) parser) -# (do -(comment +(defn- pp-tok [token] + (if (not token) (break "nil")) + (def {:line line :lexeme lex :type type :start start} token) + (string "<" line "[" start "]" ": " type ": " lex ">")) + +(defn- pp-ast [ast &opt indent] + (default indent 0) + (def {:token token :data data :type type} ast) + (def pretty-tok (pp-tok token)) + (def data-rep (if (= :array (janet-type data)) + (string "[\n" + (string/join (map (fn [x] (pp-ast x (inc indent))) data) + (string (string/repeat " " indent) "\n")) + "\n" (string/repeat " " indent) "]") + data + )) + (string (string/repeat " " indent) type ": " pretty-tok " " data-rep) +) + +(do +# (comment (def source ` -[...x] +(,,,,,,1, 2, foo, :three) + `) (def scanned (s/scan source)) # (print "\n***NEW PARSE***\n") (def a-parser (new-parser scanned)) -(try (def parsed (pattern a-parser)) ([e] (pp e))) +(def parsed (script a-parser)) +(pp (map (fn [err] (err :msg)) ((parse scanned) :errors))) +(print (pp-ast ((parse scanned) :ast))) )