dotemacs

My Emacs configuration
git clone git://git.entf.net/dotemacs
Log | Files | Refs | LICENSE

parseclj-parser.el (12314B)


      1 ;;; parseclj-parser.el --- Clojure/EDN parser              -*- lexical-binding: t; -*-
      2 
      3 ;; Copyright (C) 2017-2021  Arne Brasseur
      4 
      5 ;; Author: Arne Brasseur <arne@arnebrasseur.net>
      6 
      7 ;; This file is not part of GNU Emacs.
      8 
      9 ;; This file is free software; you can redistribute it and/or modify
     10 ;; it under the terms of the GNU General Public License as published by
     11 ;; the Free Software Foundation; either version 3, or (at your option)
     12 ;; any later version.
     13 
     14 ;; This file is distributed in the hope that it will be useful,
     15 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
     16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     17 ;; GNU General Public License for more details.
     18 
     19 ;; You should have received a copy of the GNU General Public License
     20 ;; along with GNU Emacs; see the file COPYING.  If not, write to
     21 ;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     22 ;; Boston, MA 02110-1301, USA.
     23 
     24 ;;; Commentary:
     25 
     26 ;; A shift/reduce parser for Clojure source.
     27 
     28 ;;; Code:
     29 
     30 (require 'cl-lib)
     31 (require 'subr-x)
     32 (require 'parseclj-lex)
     33 (require 'parseclj-alist)
     34 
     35 (define-error 'parseclj-parser-error "parseclj: Syntax error")
     36 
     37 (defun parseclj--error (format &rest args)
     38   "Signal a parse error.
     39 Takes a FORMAT string and optional ARGS to be passed to
     40 `format-message'.  Signals a 'parseclj-parser-error signal, which
     41 can be handled with `condition-case'."
     42   (signal 'parseclj-parser-error (list (apply #'format-message format args))))
     43 
     44 (defun parseclj--find-opening-token (stack closing-token)
     45   "Scan STACK for an opening-token matching CLOSING-TOKEN."
     46   (let ((token-type (parseclj-lex-token-type closing-token)))
     47     (cond
     48      ((eq :rparen token-type) (parseclj-lex-token-type
     49                                (seq-find (lambda (token)
     50                                            (member (parseclj-lex-token-type token)
     51                                                    '(:lparen :lambda)))
     52                                          stack)))
     53      ((eq :rbracket token-type) :lbracket)
     54      ((eq :rbrace token-type) (parseclj-lex-token-type
     55                                (seq-find (lambda (token)
     56                                            (member (parseclj-lex-token-type token)
     57                                                    '(:lbrace :set)))
     58                                          stack))))))
     59 
     60 (defun parseclj--reduce-coll (stack closing-token reduce-branch options)
     61   "Reduce collection based on the top of the STACK and a CLOSING-TOKEN.
     62 
     63 REDUCE-BRANCH is a function to be applied to the collection of tokens found
     64 from the top of the stack until an opening token that matches
     65 CLOSING-TOKEN.  This function should return an AST token representing such
     66 collection.
     67 
     68 OPTIONS is an association list.  This list is also passed down to the
     69 REDUCE-BRANCH function.  See `parseclj-parser' for more information on
     70 available options."
     71   (let ((opening-token-type (parseclj--find-opening-token stack closing-token))
     72         (fail-fast (map-elt options :fail-fast t))
     73         (collection nil))
     74     (if (not opening-token-type)
     75         (if fail-fast
     76             (parseclj--error "At position %s, unmatched %S"
     77                              (map-elt closing-token :pos)
     78                              (parseclj-lex-token-type closing-token))
     79 
     80           stack)
     81 
     82       (progn
     83         ;; unwind the stack until opening-token-type is found, adding to collection
     84         (while (and stack (not (eq (parseclj-lex-token-type (car stack)) opening-token-type)))
     85           (push (pop stack) collection))
     86 
     87         ;; did we find the right token?
     88         (if (eq (parseclj-lex-token-type (car stack)) opening-token-type)
     89             (progn
     90               (when fail-fast
     91                 ;; any unreduced tokens left: bail early
     92                 (when-let ((token (seq-find #'parseclj-lex-token-p collection)))
     93                   (parseclj--error "At position %s, unmatched %S"
     94                                    (map-elt token :pos)
     95                                    (parseclj-lex-token-type token))))
     96 
     97               ;; all good, call the reducer so it can return an updated stack with a
     98               ;; new node at the top.
     99               (let ((opening-token (pop stack)))
    100                 (funcall reduce-branch stack opening-token collection options)))
    101 
    102           ;; Unwound the stack without finding a matching paren: either bail early
    103           ;; or return the original stack and continue parsing
    104           (if fail-fast
    105               (parseclj--error "At position %s, unmatched %S"
    106                                (map-elt closing-token :pos)
    107                                (parseclj-lex-token-type closing-token))
    108 
    109             (reverse collection)))))))
    110 
    111 (defun parseclj--take-value (stack value-p)
    112   "Scan STACK until a value is found.
    113 Return everything up to the value in reversed order (meaning the value
    114 comes first in the result).
    115 
    116 STACK is the current parse stack to scan.
    117 
    118 VALUE-P a predicate to distinguish reduced values from non-values (tokens
    119 and whitespace)."
    120   (let ((result nil))
    121     (cl-block nil
    122       (while stack
    123         (cond
    124          ((parseclj-lex-token-p (car stack))
    125           (cl-return nil))
    126 
    127          ((funcall value-p (car stack))
    128           (cl-return (cons (car stack) result)))
    129 
    130          (t
    131           (push (pop stack) result)))))))
    132 
    133 (defun parseclj--take-token (stack value-p token-types)
    134   "Scan STACK until a token of a certain type is found.
    135 Returns nil if a value is encountered before a matching token is found.
    136 Return everything up to the token in reversed order (meaning the token
    137 comes first in the result).
    138 
    139 STACK is the current parse stack to scan.
    140 
    141 VALUE-P a predicate to distinguish reduced values from non-values (tokens
    142 and whitespace).
    143 
    144 TOKEN-TYPES are the token types to look for."
    145   (let ((result nil))
    146     (cl-block nil
    147       (while stack
    148         (cond
    149          ((member (parseclj-lex-token-type (car stack)) token-types)
    150           (cl-return (cons (car stack) result)))
    151          ((funcall value-p (car stack))
    152           (cl-return nil))
    153          ((parseclj-lex-token-p (car stack))
    154           (cl-return nil))
    155          (t
    156           (push (pop stack) result)))))))
    157 
    158 (defun parseclj-single-value-p (stack value-p)
    159   "Return t if STACK only has a single node for which VALUE-P is true.
    160 
    161 This checks if the stack contains a single, fully reduced value, and no
    162 dangling unmatched tokens.  When parsing with `:read-one' this indicates a
    163 form can be returned."
    164   (and (not (cl-reduce (lambda (bool node)
    165                          (or bool (parseclj-lex-token-p node)))
    166                        stack
    167                        :initial-value nil))
    168        (parseclj--take-value stack value-p)))
    169 
    170 (defun parseclj-parser (reduce-leaf reduce-branch &optional options)
    171   "Clojure/EDN stack-based shift-reduce parser.
    172 
    173 REDUCE-LEAF does reductions for leaf nodes.  It is a function that takes
    174 the current value of the stack and a token, and either returns an updated
    175 stack, with a new leaf node at the top (front), or returns the stack
    176 unmodified.
    177 
    178 REDUCE-BRANCH does reductions for branch nodes.  It is a function that
    179 takes the current value of the stack, the type of branch node to create,
    180 and a list of child nodes, and returns an updated stack, with the new node
    181 at the top (front).
    182 
    183 What \"node\" means in this case is up to the reducing functions, it could
    184 be AST nodes (as in the case of `parseclj-parser-clojure'), or plain
    185 values/sexps (as in the case of `parseedn-read'), or something else. The
    186 only requirement is that they should not put raw tokens back on the stack,
    187 as the parser relies on the presence or absence of these to detect parse
    188 errors.
    189 
    190 OPTIONS is an association list which is passed on to the reducing
    191 functions. Additionally the following options are recognized
    192 
    193 - `:fail-fast'
    194   Raise an error when a parse error is encountered, rather than continuing
    195   with a partial result.
    196 - `:value-p'
    197   A predicate function to differentiate values from tokens and
    198   whitespace. This is needed when scanning the stack to see if any
    199   reductions can be performed. By default anything that isn't a token is
    200   considered a value. This can be problematic when parsing with
    201   `:lexical-preservation', and which case you should provide an
    202   implementation that also returns falsy for :whitespace, :comment, and
    203   :discard AST nodes.
    204 - `:tag-readers'
    205   An association list that describes tag handler functions for any possible
    206   tag.  This options in only available in `parseedn-read', for more
    207   information, please refer to its documentation.
    208 - `:read-one'
    209   Return as soon as a single complete value has been read."
    210   (let ((fail-fast (map-elt options :fail-fast t))
    211         (read-one (map-elt options :read-one))
    212         (value-p (map-elt options :value-p (lambda (e) (not (parseclj-lex-token-p e)))))
    213         (stack nil)
    214         (token (parseclj-lex-next)))
    215 
    216     (while (not (or (and read-one (parseclj-single-value-p stack value-p))
    217                     (eq (parseclj-lex-token-type token) :eof)))
    218       ;; (message "STACK: %S" stack)
    219       ;; (message "TOKEN: %S\n" token)
    220 
    221       (when (and fail-fast (parseclj-lex-error-p token))
    222         (parseclj--error "Invalid token at %s: %S"
    223                          (map-elt token :pos)
    224                          (parseclj-lex-token-form token)))
    225 
    226       ;; Reduce based on the top item on the stack (collections)
    227       (cond
    228        ((parseclj-lex-leaf-token-p token)
    229         (setf stack (funcall reduce-leaf stack token options)))
    230 
    231        ((parseclj-lex-closing-token-p token)
    232         (setf stack (parseclj--reduce-coll stack token reduce-branch options)))
    233 
    234        (t (push token stack)))
    235 
    236       ;; Reduce based on top two items on the stack (special prefixed elements)
    237       (let* ((top-value (parseclj--take-value stack value-p))
    238              (opening-token (parseclj--take-token (nthcdr (length top-value) stack) value-p parseclj-lex--prefix-tokens))
    239              new-stack)
    240         (while (and top-value opening-token)
    241           ;; (message "Reducing...")
    242           ;; (message "  - STACK %S" stack)
    243           ;; (message "  - OPENING-TOKEN %S" opening-token)
    244           ;; (message "  - TOP-VALUE %S" top-value)
    245           (setq new-stack (nthcdr (+ (length top-value) (length opening-token)) stack))
    246           (setq stack (funcall reduce-branch new-stack (car opening-token) (append (cdr opening-token) top-value) options))
    247 
    248           ;; recur
    249           (setq top-value (parseclj--take-value stack value-p))
    250           (setq opening-token (parseclj--take-token (nthcdr (length top-value) stack) value-p parseclj-lex--prefix-tokens))))
    251 
    252       ;; Reduce based on top three items on the stack (metadata, namespaced maps)
    253       (let* ((top-value-1 (parseclj--take-value stack value-p))
    254              (top-value-2 (parseclj--take-value (nthcdr (length top-value-1) stack) value-p))
    255              (opening-token (parseclj--take-token (nthcdr (+ (length top-value-1)
    256                                                              (length top-value-2)) stack) value-p parseclj-lex--prefix-2-tokens))
    257              new-stack)
    258         (while (and top-value-1 top-value-2 opening-token)
    259           (setq new-stack (nthcdr (apply #'+ (mapcar #'length (list top-value-1 top-value-2 opening-token))) stack))
    260           (setq stack (funcall reduce-branch new-stack (car opening-token) (append (cdr opening-token) top-value-2 top-value-1) options))
    261 
    262           ;; recur
    263           (setq top-value-1 (parseclj--take-value stack value-p))
    264           (setq top-value-2 (parseclj--take-value (nthcdr (length top-value-1) stack) value-p))
    265           (setq opening-token (parseclj--take-token (nthcdr (+ (length top-value-1)
    266                                                                (length top-value-2)) stack) value-p parseclj-lex--prefix-2-tokens))))
    267 
    268       (setq token (parseclj-lex-next)))
    269 
    270     ;; reduce root
    271     (when fail-fast
    272       (when-let ((token (seq-find #'parseclj-lex-token-p stack)))
    273         (parseclj--error "At position %s, unmatched %S"
    274                          (map-elt token :pos)
    275                          (parseclj-lex-token-type token))))
    276 
    277     (if read-one
    278         (car (parseclj--take-value stack value-p))
    279       (car (funcall reduce-branch nil (parseclj-lex-token :root "" 1)
    280                     (reverse stack)
    281                     options)))))
    282 
    283 (provide 'parseclj-parser)
    284 ;;; parseclj-parser.el ends here