diff --git a/src/instaparse/gll.cljc b/src/instaparse/gll.cljc index 0290128..7f168dd 100644 --- a/src/instaparse/gll.cljc +++ b/src/instaparse/gll.cljc @@ -108,7 +108,7 @@ ; In diagnostic messages, how many characters ahead do we want to show. (def ^:dynamic *diagnostic-char-lookahead* 10) -(declare sub-sequence string-context) +(declare sub-sequence string-context merge-meta) #?(:clj (defn string-context [^CharSequence text index] @@ -218,6 +218,22 @@ :cljs (def sub-sequence subs)) +(defn with-path-meta + [g] + (let [gfn (fn gfn [root path] + (vary-meta + (if (:parser root) + (assoc root + :parser (gfn (:parser root) (conj path (:tag root)))) + (if (:parsers root) + (assoc root + :parsers (map-indexed #(gfn %2 (conj path (:tag root) %1)) + (:parsers root))) + root)) + assoc :path path))] + (into {} (for [[nt exp] g] + [nt (gfn exp [nt])])))) + ; The trampoline structure contains the grammar, text to parse, a stack and a nodes ; Also contains an atom to hold successes and one to hold index of failure point. ; grammar is a map from non-terminals to parsers @@ -229,16 +245,18 @@ (defrecord Tramp [grammar text segment fail-index node-builder stack next-stack generation negative-listeners - msg-cache nodes success failure trace?]) + msg-cache nodes success failure trace? + path-log]) (defn make-tramp ([grammar text] (make-tramp grammar text (text->segment text) -1 nil)) ([grammar text segment] (make-tramp grammar text segment -1 nil)) ([grammar text fail-index node-builder] (make-tramp grammar text (text->segment text) fail-index node-builder)) ([grammar text segment fail-index node-builder] - (Tramp. grammar text segment + (Tramp. (with-path-meta grammar) text segment fail-index node-builder (atom []) (atom []) (atom 0) (atom (sorted-map-by >)) - (atom {}) (atom {}) (atom nil) (atom (Failure. 0 [])) (trace-or-false)))) + (atom {}) (atom {}) (atom nil) (atom (Failure. 0 [])) (trace-or-false) + (atom [])))) ; A Success record contains the result and the index to continue from (defn make-success [result index] {:result result :index index}) @@ -360,6 +378,8 @@ total? (total-success? tramp result) results (if total? (:full-results node) (:results node))] (when (not (@results result)) ; when result is not already in @results + (when-let [path (:path (meta parser))] + (swap! (:path-log tramp) conj path)) (profile (add! :push-result)) (swap! results conj result) (doseq [listener @(:listeners node)] @@ -457,7 +477,10 @@ (cond @(:success tramp) (do (log tramp "Successful parse.\nProfile: " @stats) - (cons (:result @(:success tramp)) + (cons (let [obj (:result @(:success tramp))] + (if (coll? obj) + (merge-meta obj {:path-log @(:path-log tramp)}) + obj)) (lazy-seq (do (reset! (:success tramp) nil) (run tramp true))))) diff --git a/test/instaparse/core_test.cljc b/test/instaparse/core_test.cljc index a8ad3c5..ae59728 100644 --- a/test/instaparse/core_test.cljc +++ b/test/instaparse/core_test.cljc @@ -793,12 +793,12 @@ (defn hiccup-line-col-spans [t] (if (sequential? t) - (cons (meta t) (map hiccup-line-col-spans (next t))) + (cons (dissoc (meta t) :path-log) (map hiccup-line-col-spans (next t))) t)) (defn enlive-line-col-spans [t] (if (map? t) - (cons (meta t) (map enlive-line-col-spans (:content t))) + (cons (dissoc (meta t) :path-log) (map enlive-line-col-spans (:content t))) t)) (deftest line-col-test diff --git a/test/instaparse/path_log.cljc b/test/instaparse/path_log.cljc new file mode 100644 index 0000000..6926819 --- /dev/null +++ b/test/instaparse/path_log.cljc @@ -0,0 +1,93 @@ +(ns instaparse.path-log + (:require + #?(:clj [clojure.test :refer [deftest is]] + :cljs [cljs.test :as t]) + #?(:clj [instaparse.core :as insta] + :cljs [instaparse.core :as insta])) + #?(:cljs (:require-macros + [cljs.test :refer [is deftest]]))) + +(def simple-parser + (insta/parser + "TOP = R1+ \"\\n\" + R1 = ( 'foo' | 'bar' | R3 )+ + | 'baz' + | R2 + R2 = 'qux' + | 'quux' + | 'quuux' + R3 = 'z'")) + +(def text1 + "foo\n") + +(def text2 + "barquuxz\n") + +(def text3 + "quxquxquuxquuxquuuxquuux\n") + +(deftest path-log-tests + (let [res1 (simple-parser text1) + path-freqs1 (->> res1 meta :path-log frequencies) + res2 (simple-parser text2) + path-freqs2 (->> res2 meta :path-log frequencies) + res3 (simple-parser text3) + path-freqs3 (->> res3 meta :path-log frequencies)] + + (is (= res1 + [:TOP + [:R1 "foo"] + "\n"])) + (is (= path-freqs1 + {[:TOP] 1, + [:TOP :cat 0] 1, + [:TOP :cat 0 :plus] 1, + [:TOP :cat 1] 1, + [:R1] 1, + [:R1 :alt 0] 1, + [:R1 :alt 0 :plus] 1, + [:R1 :alt 0 :plus :alt 0] 1})) + + (is (= res2 + [:TOP + [:R1 "bar"] + [:R1 [:R2 "quux"]] + [:R1 [:R3 "z"]] + "\n"])) + (is (= path-freqs2 + {[:TOP] 1, + [:TOP :cat 0] 3, + [:TOP :cat 0 :plus] 3, + [:TOP :cat 1] 1, + [:R1] 3, + [:R1 :alt 0] 2, + [:R1 :alt 0 :plus] 2, + [:R1 :alt 0 :plus :alt 1] 1, + [:R1 :alt 0 :plus :alt 2] 1, + [:R1 :alt 2] 1, + [:R2] 1, + [:R2 :alt 1] 1, + [:R3] 1})) + + (is (= res3 + [:TOP + [:R1 [:R2 "qux"]] + [:R1 [:R2 "qux"]] + [:R1 [:R2 "quux"]] + [:R1 [:R2 "quux"]] + [:R1 [:R2 "quuux"]] + [:R1 [:R2 "quuux"]] + "\n"])) + (is (= path-freqs3 + {[:TOP] 1, + [:TOP :cat 0] 6, + [:TOP :cat 0 :plus] 6, + [:TOP :cat 1] 1, + [:R1] 6, + [:R1 :alt 2] 6, + [:R2] 6, + [:R2 :alt 0] 2, + [:R2 :alt 1] 2, + [:R2 :alt 2] 2})))) +