dotemacs

My Emacs configuration
git clone git://git.entf.net/dotemacs
Log | Files | Refs | LICENSE

parseedn.el (10278B)


      1 ;;; parseedn.el --- Clojure/EDN parser              -*- lexical-binding: t; -*-
      2 
      3 ;; Copyright (C) 2017-2021  Arne Brasseur
      4 
      5 ;; Author: Arne Brasseur <arne@arnebrasseur.net>
      6 ;; Keywords: lisp clojure edn parser
      7 ;; Package-Requires: ((emacs "26") (parseclj "1.1.0") (map "2"))
      8 ;; Version: 1.1.0
      9 
     10 ;; This file is not part of GNU Emacs.
     11 
     12 ;; This file is free software; you can redistribute it and/or modify
     13 ;; it under the terms of the GNU General Public License as published by
     14 ;; the Free Software Foundation; either version 3, or (at your option)
     15 ;; any later version.
     16 
     17 ;; This file is distributed in the hope that it will be useful,
     18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
     19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     20 ;; GNU General Public License for more details.
     21 
     22 ;; You should have received a copy of the GNU General Public License
     23 ;; along with GNU Emacs; see the file COPYING.  If not, write to
     24 ;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     25 ;; Boston, MA 02110-1301, USA.
     26 
     27 ;;; Commentary:
     28 
     29 ;; parseedn is an Emacs Lisp library for parsing EDN (Clojure) data.
     30 ;; It uses parseclj's shift-reduce parser internally.
     31 
     32 ;; EDN and Emacs Lisp have some important differences that make
     33 ;; translation from one to the other not transparent (think
     34 ;; representing an EDN map into Elisp, or being able to differentiate
     35 ;; between false and nil in Elisp). Because of this, parseedn takes
     36 ;; certain decisions when parsing and transforming EDN data into Elisp
     37 ;; data types.  For more information please refer to parseclj's design
     38 ;; documentation.
     39 
     40 ;;; Code:
     41 
     42 ;; The EDN spec is not clear about whether \u0123 and \o012 are supported in
     43 ;; strings. They are described as character literals, but not as string escape
     44 ;; codes. In practice all implementations support them (mostly with broken
     45 ;; surrogate pair support), so we do the same. Sorry, emoji 🙁.
     46 ;;
     47 ;; Note that this is kind of broken, we don't correctly detect if \u or \o forms
     48 ;; don't have the right forms.
     49 
     50 (require 'cl-lib)
     51 (require 'map)
     52 (require 'parseclj-parser)
     53 
     54 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     55 ;; Reader
     56 
     57 (defvar parseedn-default-tag-readers
     58   (list (cons 'inst (lambda (s)
     59                       (cl-list* 'edn-inst (date-to-time s))))
     60         (cons 'uuid (lambda (s)
     61                       (list 'edn-uuid s))))
     62   "Default reader functions for handling tagged literals in EDN.
     63 These are the ones defined in the EDN spec, #inst and #uuid.  It
     64 is not recommended you change this variable, as this globally
     65 changes the behavior of the EDN reader.  Instead pass your own
     66 handlers as an optional argument to the reader functions.")
     67 
     68 (defun parseedn-tagged-literal (tag form)
     69   "Construct a data representation of a tagged literal from TAG and FORM."
     70   (list 'edn-tagged-literal tag form))
     71 
     72 (defvar parseedn-default-data-reader-fn nil
     73   "The default tagged literal reader function.
     74 
     75 When no data reader is found for a tag and
     76 `parseedn-default-data-reader-fn' is non-nil, it will be called
     77 with two arguments, the tag and the value.  If
     78 `parseedn-default-data-reader-fn' is nil (the default), an
     79 exception will be thrown for the unknown tag.
     80 
     81 The default data reader can also be provided via the tagged
     82 reader options registered under the :default keyword when calling
     83 the reader functions.")
     84 
     85 (defun parseedn-reduce-leaf (stack token _options)
     86   "Put in the STACK an elisp value representing TOKEN.
     87 
     88 OPTIONS is an association list.  See `parseclj-parse' for more information
     89 on available options."
     90   (if (member (parseclj-lex-token-type token) (list :whitespace :comment))
     91       stack
     92     (cons (parseclj-lex--leaf-token-value token) stack)))
     93 
     94 (defun parseedn-reduce-branch (stack opening-token children options)
     95   "Reduce STACK with an sequence containing a collection of other elisp values.
     96 Ignores discard tokens.
     97 
     98 OPENING-TOKEN is a lex token representing an opening paren, bracket or
     99 brace.
    100 CHILDREN is a collection elisp values to be reduced into an elisp
    101 sequence.
    102 OPTIONS is an association list.  See `parseclj-parse' for more information
    103 on available options."
    104   (let ((tag-readers (parseclj-alist-merge parseedn-default-tag-readers (alist-get :tag-readers options)))
    105         (token-type (parseclj-lex-token-type opening-token)))
    106     (if (eq token-type :discard)
    107         stack
    108       (cons
    109        (cond
    110         ((eq :root token-type) children)
    111         ((eq :lparen token-type) children)
    112         ((eq :lbracket token-type) (apply #'vector children))
    113         ((eq :set token-type) (list 'edn-set children))
    114         ((eq :lbrace token-type) (let* ((kvs (seq-partition children 2))
    115                                         (hash-map (make-hash-table :test 'equal :size (length kvs))))
    116                                    (seq-do (lambda (pair)
    117                                              (puthash (car pair) (cadr pair) hash-map))
    118                                            kvs)
    119                                    hash-map))
    120         ((eq :tag token-type) (let* ((tag (intern (substring (alist-get :form opening-token) 1)))
    121                                      (reader (alist-get tag tag-readers))
    122                                      (default-reader (alist-get :default tag-readers parseedn-default-data-reader-fn)))
    123                                 (cond
    124                                  ((functionp reader)
    125                                   (funcall reader (car children)))
    126                                  ((functionp default-reader)
    127                                   (funcall default-reader tag (car children)))
    128                                  (t (user-error "No reader for tag #%S in %S" tag (map-keys tag-readers)))))))
    129        stack))))
    130 
    131 (defun parseedn-read (&optional tag-readers)
    132   "Read content from current buffer and parse it as EDN source.
    133 Returns an Emacs Lisp value.
    134 
    135 TAG-READERS is an optional association list where keys are symbols
    136 identifying *tags*, and values are tag handler functions that receive one
    137 argument: *the tagged element*, and specify how to interpret it."
    138   (parseclj-parser #'parseedn-reduce-leaf
    139                    #'parseedn-reduce-branch
    140                    (list (cons :tag-readers tag-readers))))
    141 
    142 (defun parseedn-read-str (s &optional tag-readers)
    143   "Parse string S as EDN.
    144 Returns an Emacs Lisp value.
    145 
    146 TAG-READERS is an optional association list.  For more information, see
    147 `parseedn-read'."
    148   (with-temp-buffer
    149     (insert s)
    150     (goto-char 1)
    151     (car (parseedn-read tag-readers))))
    152 
    153 
    154 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    155 ;; Printer
    156 
    157 (defun parseedn-print-seq (coll)
    158   "Insert sequence COLL as EDN into the current buffer."
    159   (parseedn-print (elt coll 0))
    160   (let ((next (seq-drop coll 1)))
    161     (when (not (seq-empty-p next))
    162       (insert " ")
    163       (parseedn-print-seq next))))
    164 
    165 (defun parseedn-print-hash-or-alist (map &optional ks)
    166   "Insert hash table MAP or elisp alist as an EDN map into the current buffer."
    167   (let ((alist? (listp map))
    168         (keys (or ks (map-keys map))))
    169     (parseedn-print (car keys))
    170     (insert " ")
    171     (parseedn-print (map-elt map (car keys)))
    172     (let ((next (cdr keys)))
    173       (when (not (seq-empty-p next))
    174         (insert ", ")
    175         (parseedn-print-hash-or-alist map next)))))
    176 
    177 (defun parseedn-print-plist (plist)
    178   "Insert an elisp property list as an EDN map into the current buffer."
    179   (parseedn-print (car plist))
    180   (insert " ")
    181   (parseedn-print (cadr plist))
    182   (let ((next (cddr plist)))
    183     (when (not (seq-empty-p next))
    184       (insert ", ")
    185       (parseedn-print-plist next))))
    186 
    187 (defun parseedn-print-inst (time)
    188   "Insert an inst value into the current buffer.
    189 
    190 Take an encode-time style value and print it as a timestamp
    191 deliniated by double quotes."
    192   (insert (format-time-string "\"%Y-%m-%dT%T\"" time)))
    193 
    194 (defun parseedn-alist-p (list)
    195   "Non-null if and only if LIST is an alist with simple keys."
    196   (while (consp list)
    197     (setq list (if (and (consp (car list))
    198                         (atom (caar list)))
    199                    (cdr list)
    200                  'not-alist)))
    201   (null list))
    202 
    203 (defun parseedn-plist-p (list)
    204   "Non-null if and only if LIST is a plist with keyword keys."
    205   (while (consp list)
    206     (setq list (if (and (keywordp (car list))
    207                         (consp (cdr list)))
    208                    (cddr list)
    209                  'not-plist)))
    210   (null list))
    211 
    212 (defun parseedn-print (datum)
    213   "Insert DATUM as EDN into the current buffer.
    214 DATUM can be any Emacs Lisp value."
    215   (cond
    216    ((or (null datum) (numberp datum))
    217     (prin1 datum (current-buffer)))
    218 
    219    ((stringp datum)
    220     (insert "\"")
    221     (seq-doseq (char datum)
    222       (insert (cond
    223                ((eq ?\t char) "\\t")
    224                ((eq ?\f char) "\\f")
    225                ((eq ?\" char) "\\\"")
    226                ((eq ?\r char) "\\r")
    227                ((eq ?\n char) "\\n")
    228                ((eq ?\\ char) "\\\\")
    229                (t (char-to-string char)))))
    230     (insert "\""))
    231 
    232    ((eq t datum)
    233     (insert "true"))
    234 
    235    ((or (keywordp datum) (symbolp datum))
    236     (insert (symbol-name datum)))
    237 
    238    ((vectorp datum) (insert "[") (parseedn-print-seq datum) (insert "]"))
    239 
    240    ((or (hash-table-p datum) (parseedn-alist-p datum))
    241     (insert "{")
    242     (parseedn-print-hash-or-alist datum)
    243     (insert "}"))
    244 
    245    ((parseedn-plist-p datum)
    246     (insert "{")
    247     (parseedn-print-plist datum)
    248     (insert "}"))
    249 
    250    ((consp datum)
    251     (cond
    252      ((not (listp (cdr datum))) ; dotted pair
    253       (error "Don't know how to print: %s" datum))
    254      ((eq 'edn-set (car datum))
    255       (insert "#{") (parseedn-print-seq (cadr datum)) (insert "}"))
    256      ((eq 'edn-uuid (car datum))
    257       (insert "#uuid ") (parseedn-print-seq (cdr datum)))
    258      ((eq 'edn-inst (car datum))
    259       (insert "#inst ") (parseedn-print-inst (cdr datum)))
    260      ((eq 'edn-tagged-literal (car datum))
    261       (insert "#" (symbol-name (cadr datum)) " ")
    262       (parseedn-print (caddr datum)))
    263      (t (insert "(") (parseedn-print-seq datum) (insert ")"))))
    264 
    265    (t (error "Don't know how to print: %s" datum))))
    266 
    267 (defun parseedn-print-str (datum)
    268   "Return a string containing DATUM as EDN.
    269 DATUM can be any Emacs Lisp value."
    270   (with-temp-buffer
    271     (parseedn-print datum)
    272     (buffer-substring-no-properties (point-min) (point-max))))
    273 
    274 (provide 'parseedn)
    275 
    276 ;;; parseedn.el ends here