parseclj-parser.el (12314B)
1 ;;; parseclj-parser.el --- Clojure/EDN parser -*- lexical-binding: t; -*- 2 3 ;; Copyright (C) 2017-2021 Arne Brasseur 4 5 ;; Author: Arne Brasseur <arne@arnebrasseur.net> 6 7 ;; This file is not part of GNU Emacs. 8 9 ;; This file is free software; you can redistribute it and/or modify 10 ;; it under the terms of the GNU General Public License as published by 11 ;; the Free Software Foundation; either version 3, or (at your option) 12 ;; any later version. 13 14 ;; This file is distributed in the hope that it will be useful, 15 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 ;; GNU General Public License for more details. 18 19 ;; You should have received a copy of the GNU General Public License 20 ;; along with GNU Emacs; see the file COPYING. If not, write to 21 ;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 22 ;; Boston, MA 02110-1301, USA. 23 24 ;;; Commentary: 25 26 ;; A shift/reduce parser for Clojure source. 27 28 ;;; Code: 29 30 (require 'cl-lib) 31 (require 'subr-x) 32 (require 'parseclj-lex) 33 (require 'parseclj-alist) 34 35 (define-error 'parseclj-parser-error "parseclj: Syntax error") 36 37 (defun parseclj--error (format &rest args) 38 "Signal a parse error. 39 Takes a FORMAT string and optional ARGS to be passed to 40 `format-message'. Signals a 'parseclj-parser-error signal, which 41 can be handled with `condition-case'." 42 (signal 'parseclj-parser-error (list (apply #'format-message format args)))) 43 44 (defun parseclj--find-opening-token (stack closing-token) 45 "Scan STACK for an opening-token matching CLOSING-TOKEN." 46 (let ((token-type (parseclj-lex-token-type closing-token))) 47 (cond 48 ((eq :rparen token-type) (parseclj-lex-token-type 49 (seq-find (lambda (token) 50 (member (parseclj-lex-token-type token) 51 '(:lparen :lambda))) 52 stack))) 53 ((eq :rbracket token-type) :lbracket) 54 ((eq :rbrace token-type) (parseclj-lex-token-type 55 (seq-find (lambda (token) 56 (member (parseclj-lex-token-type token) 57 '(:lbrace :set))) 58 stack)))))) 59 60 (defun parseclj--reduce-coll (stack closing-token reduce-branch options) 61 "Reduce collection based on the top of the STACK and a CLOSING-TOKEN. 62 63 REDUCE-BRANCH is a function to be applied to the collection of tokens found 64 from the top of the stack until an opening token that matches 65 CLOSING-TOKEN. This function should return an AST token representing such 66 collection. 67 68 OPTIONS is an association list. This list is also passed down to the 69 REDUCE-BRANCH function. See `parseclj-parser' for more information on 70 available options." 71 (let ((opening-token-type (parseclj--find-opening-token stack closing-token)) 72 (fail-fast (map-elt options :fail-fast t)) 73 (collection nil)) 74 (if (not opening-token-type) 75 (if fail-fast 76 (parseclj--error "At position %s, unmatched %S" 77 (map-elt closing-token :pos) 78 (parseclj-lex-token-type closing-token)) 79 80 stack) 81 82 (progn 83 ;; unwind the stack until opening-token-type is found, adding to collection 84 (while (and stack (not (eq (parseclj-lex-token-type (car stack)) opening-token-type))) 85 (push (pop stack) collection)) 86 87 ;; did we find the right token? 88 (if (eq (parseclj-lex-token-type (car stack)) opening-token-type) 89 (progn 90 (when fail-fast 91 ;; any unreduced tokens left: bail early 92 (when-let ((token (seq-find #'parseclj-lex-token-p collection))) 93 (parseclj--error "At position %s, unmatched %S" 94 (map-elt token :pos) 95 (parseclj-lex-token-type token)))) 96 97 ;; all good, call the reducer so it can return an updated stack with a 98 ;; new node at the top. 99 (let ((opening-token (pop stack))) 100 (funcall reduce-branch stack opening-token collection options))) 101 102 ;; Unwound the stack without finding a matching paren: either bail early 103 ;; or return the original stack and continue parsing 104 (if fail-fast 105 (parseclj--error "At position %s, unmatched %S" 106 (map-elt closing-token :pos) 107 (parseclj-lex-token-type closing-token)) 108 109 (reverse collection))))))) 110 111 (defun parseclj--take-value (stack value-p) 112 "Scan STACK until a value is found. 113 Return everything up to the value in reversed order (meaning the value 114 comes first in the result). 115 116 STACK is the current parse stack to scan. 117 118 VALUE-P a predicate to distinguish reduced values from non-values (tokens 119 and whitespace)." 120 (let ((result nil)) 121 (cl-block nil 122 (while stack 123 (cond 124 ((parseclj-lex-token-p (car stack)) 125 (cl-return nil)) 126 127 ((funcall value-p (car stack)) 128 (cl-return (cons (car stack) result))) 129 130 (t 131 (push (pop stack) result))))))) 132 133 (defun parseclj--take-token (stack value-p token-types) 134 "Scan STACK until a token of a certain type is found. 135 Returns nil if a value is encountered before a matching token is found. 136 Return everything up to the token in reversed order (meaning the token 137 comes first in the result). 138 139 STACK is the current parse stack to scan. 140 141 VALUE-P a predicate to distinguish reduced values from non-values (tokens 142 and whitespace). 143 144 TOKEN-TYPES are the token types to look for." 145 (let ((result nil)) 146 (cl-block nil 147 (while stack 148 (cond 149 ((member (parseclj-lex-token-type (car stack)) token-types) 150 (cl-return (cons (car stack) result))) 151 ((funcall value-p (car stack)) 152 (cl-return nil)) 153 ((parseclj-lex-token-p (car stack)) 154 (cl-return nil)) 155 (t 156 (push (pop stack) result))))))) 157 158 (defun parseclj-single-value-p (stack value-p) 159 "Return t if STACK only has a single node for which VALUE-P is true. 160 161 This checks if the stack contains a single, fully reduced value, and no 162 dangling unmatched tokens. When parsing with `:read-one' this indicates a 163 form can be returned." 164 (and (not (cl-reduce (lambda (bool node) 165 (or bool (parseclj-lex-token-p node))) 166 stack 167 :initial-value nil)) 168 (parseclj--take-value stack value-p))) 169 170 (defun parseclj-parser (reduce-leaf reduce-branch &optional options) 171 "Clojure/EDN stack-based shift-reduce parser. 172 173 REDUCE-LEAF does reductions for leaf nodes. It is a function that takes 174 the current value of the stack and a token, and either returns an updated 175 stack, with a new leaf node at the top (front), or returns the stack 176 unmodified. 177 178 REDUCE-BRANCH does reductions for branch nodes. It is a function that 179 takes the current value of the stack, the type of branch node to create, 180 and a list of child nodes, and returns an updated stack, with the new node 181 at the top (front). 182 183 What \"node\" means in this case is up to the reducing functions, it could 184 be AST nodes (as in the case of `parseclj-parser-clojure'), or plain 185 values/sexps (as in the case of `parseedn-read'), or something else. The 186 only requirement is that they should not put raw tokens back on the stack, 187 as the parser relies on the presence or absence of these to detect parse 188 errors. 189 190 OPTIONS is an association list which is passed on to the reducing 191 functions. Additionally the following options are recognized 192 193 - `:fail-fast' 194 Raise an error when a parse error is encountered, rather than continuing 195 with a partial result. 196 - `:value-p' 197 A predicate function to differentiate values from tokens and 198 whitespace. This is needed when scanning the stack to see if any 199 reductions can be performed. By default anything that isn't a token is 200 considered a value. This can be problematic when parsing with 201 `:lexical-preservation', and which case you should provide an 202 implementation that also returns falsy for :whitespace, :comment, and 203 :discard AST nodes. 204 - `:tag-readers' 205 An association list that describes tag handler functions for any possible 206 tag. This options in only available in `parseedn-read', for more 207 information, please refer to its documentation. 208 - `:read-one' 209 Return as soon as a single complete value has been read." 210 (let ((fail-fast (map-elt options :fail-fast t)) 211 (read-one (map-elt options :read-one)) 212 (value-p (map-elt options :value-p (lambda (e) (not (parseclj-lex-token-p e))))) 213 (stack nil) 214 (token (parseclj-lex-next))) 215 216 (while (not (or (and read-one (parseclj-single-value-p stack value-p)) 217 (eq (parseclj-lex-token-type token) :eof))) 218 ;; (message "STACK: %S" stack) 219 ;; (message "TOKEN: %S\n" token) 220 221 (when (and fail-fast (parseclj-lex-error-p token)) 222 (parseclj--error "Invalid token at %s: %S" 223 (map-elt token :pos) 224 (parseclj-lex-token-form token))) 225 226 ;; Reduce based on the top item on the stack (collections) 227 (cond 228 ((parseclj-lex-leaf-token-p token) 229 (setf stack (funcall reduce-leaf stack token options))) 230 231 ((parseclj-lex-closing-token-p token) 232 (setf stack (parseclj--reduce-coll stack token reduce-branch options))) 233 234 (t (push token stack))) 235 236 ;; Reduce based on top two items on the stack (special prefixed elements) 237 (let* ((top-value (parseclj--take-value stack value-p)) 238 (opening-token (parseclj--take-token (nthcdr (length top-value) stack) value-p parseclj-lex--prefix-tokens)) 239 new-stack) 240 (while (and top-value opening-token) 241 ;; (message "Reducing...") 242 ;; (message " - STACK %S" stack) 243 ;; (message " - OPENING-TOKEN %S" opening-token) 244 ;; (message " - TOP-VALUE %S" top-value) 245 (setq new-stack (nthcdr (+ (length top-value) (length opening-token)) stack)) 246 (setq stack (funcall reduce-branch new-stack (car opening-token) (append (cdr opening-token) top-value) options)) 247 248 ;; recur 249 (setq top-value (parseclj--take-value stack value-p)) 250 (setq opening-token (parseclj--take-token (nthcdr (length top-value) stack) value-p parseclj-lex--prefix-tokens)))) 251 252 ;; Reduce based on top three items on the stack (metadata, namespaced maps) 253 (let* ((top-value-1 (parseclj--take-value stack value-p)) 254 (top-value-2 (parseclj--take-value (nthcdr (length top-value-1) stack) value-p)) 255 (opening-token (parseclj--take-token (nthcdr (+ (length top-value-1) 256 (length top-value-2)) stack) value-p parseclj-lex--prefix-2-tokens)) 257 new-stack) 258 (while (and top-value-1 top-value-2 opening-token) 259 (setq new-stack (nthcdr (apply #'+ (mapcar #'length (list top-value-1 top-value-2 opening-token))) stack)) 260 (setq stack (funcall reduce-branch new-stack (car opening-token) (append (cdr opening-token) top-value-2 top-value-1) options)) 261 262 ;; recur 263 (setq top-value-1 (parseclj--take-value stack value-p)) 264 (setq top-value-2 (parseclj--take-value (nthcdr (length top-value-1) stack) value-p)) 265 (setq opening-token (parseclj--take-token (nthcdr (+ (length top-value-1) 266 (length top-value-2)) stack) value-p parseclj-lex--prefix-2-tokens)))) 267 268 (setq token (parseclj-lex-next))) 269 270 ;; reduce root 271 (when fail-fast 272 (when-let ((token (seq-find #'parseclj-lex-token-p stack))) 273 (parseclj--error "At position %s, unmatched %S" 274 (map-elt token :pos) 275 (parseclj-lex-token-type token)))) 276 277 (if read-one 278 (car (parseclj--take-value stack value-p)) 279 (car (funcall reduce-branch nil (parseclj-lex-token :root "" 1) 280 (reverse stack) 281 options))))) 282 283 (provide 'parseclj-parser) 284 ;;; parseclj-parser.el ends here