dotemacs

My Emacs configuration
git clone git://git.entf.net/dotemacs
Log | Files | Refs | LICENSE

esxml.el (9605B)


      1 ;;; esxml.el --- Library for working with xml via esxml and sxml
      2 ;; Copyright (C) 2012
      3 
      4 ;; Author: Evan Izaksonas-Smith <izak0002 at umn dot edu>
      5 ;; Maintainer: Evan Izaksonas-Smith
      6 ;; URL: https://github.com/tali713/esxml
      7 ;; Created: 15th August 2012
      8 ;; Version: 0.3.7
      9 ;; Keywords: tools, lisp, comm
     10 ;; Description: A library for easily generating XML/XHTML in elisp
     11 ;;
     12 ;; This program is free software; you can redistribute it and/or modify
     13 ;; it under the terms of the GNU General Public License as published by
     14 ;; the Free Software Foundation, either version 3 of the License, or
     15 ;; (at your option) any later version.
     16 
     17 ;; This program is distributed in the hope that it will be useful,
     18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
     19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     20 ;; GNU General Public License for more details.
     21 
     22 ;; You should have received a copy of the GNU General Public License
     23 ;; along with this program.  If not, see <http://www.gnu.org/licenses/>.
     24 
     25 ;;; Commentary:
     26 
     27 ;; This is XML/XHTML done with S-Expressions in EmacsLisp.  Simply,
     28 ;; this is the easiest way to write HTML or XML in Lisp.
     29 ;;
     30 ;; This library uses the native form of XML representation as used by
     31 ;; many libraries already included within emacs.  This representation
     32 ;; will be referred to as "esxml" throughout this library.  See
     33 ;; `esxml-to-xml' for a concise description of the format.
     34 ;;
     35 ;; This library is not intended to be used directly by a user, though
     36 ;; it certainly could be.  It could be used to generate static html,
     37 ;; or use a library like `elnode' to serve dynamic pages.  Or even to
     38 ;; extract a form from a site to produce an API.
     39 ;;
     40 ;; TODO: Better documentation, more convenience.
     41 ;;
     42 ;; NOTICE: Code base will be transitioning to using pcase instead of
     43 ;; destructuring bind wherever possible.  If this leads to hard to
     44 ;; debug code, please let me know, and I will do whatever I can to
     45 ;; resolve these issues.
     46 ;;
     47 ;;; Code:
     48 (require 'cl-lib)
     49 (require 'xml)
     50 (require 'pcase)
     51 
     52 (defun string-trim-whitespace (string)
     53   "A simple function, strips the whitespace from beginning and
     54 end of the string.  Leaves all other whitespace untouched."
     55   (replace-regexp-in-string
     56    (rx string-start (* whitespace)
     57        (group (+? anything))
     58        (* whitespace) string-end)
     59    "\\1"
     60    string))
     61 
     62 (defun esxml-trim-ws (esxml)
     63   "This may cause problems, is intended for parsing xml into sxml
     64 but may eroneously delete desirable white space."
     65   (if (stringp esxml) (string-trim-whitespace esxml)
     66     (pcase-let ((`(,tag ,attrs . ,body) esxml))
     67       `(,tag ,attrs
     68              ,@(mapcar 'esxml-trim-ws body)))))
     69 
     70 (defun attrp (attr)
     71   "Returns t if attr is a an esxml attribute.
     72 An esxml attribute is a cons of the form (symbol . string)"
     73  (and (consp attr)
     74        (symbolp (car attr))
     75        (stringp (cdr attr))))
     76 
     77 (defun esxml--convert-pair (attr)
     78   "Converts from cons cell to attribute pair.  Not intended for
     79 general use."
     80   (pcase-let ((`(,car . ,cdr) attr))
     81     (cl-check-type cdr string)
     82     (concat (symbol-name car)
     83             "="
     84             (prin1-to-string cdr))))
     85 
     86 (defun attrsp (attrs)
     87     "Returns t if attrs is a list of esxml attributes.
     88 
     89 See: `attrp'"
     90   (and (listp attrs)
     91        (cl-every (lambda (attr)
     92                    (and (consp attr)
     93                         (symbolp (car attr))
     94                         (stringp (cdr attr))))
     95                  attrs)))
     96 
     97 (defun esxml-validate-form (esxml)
     98   "A fast esxml validator.  Will error on invalid subparts making
     99 it suitable for hindsight testing."
    100   (cond ((stringp esxml) nil)
    101         ((< (length esxml) 2)
    102          (error "%s is too short to be a valid esxml expression" esxml))
    103         (t (pcase-let ((`(,tag ,attrs . ,body) esxml))
    104              (cl-check-type tag symbol)
    105              (cl-check-type attrs attrs)
    106              (mapcar 'esxml-validate-form body)))))
    107 
    108 ;; While the following could certainly have been written using format,
    109 ;; concat makes them easier to read.  Update later if neccesary for
    110 ;; efficiency.
    111 
    112 ;; Though at first glance the recursive nature of this function might
    113 ;; give one pause, since xml is a recursive data type, a recursive
    114 ;; parser is an optimal strategy.  each node will be visited exactly
    115 ;; once during the transformation.
    116 ;;
    117 ;; Further, since a string is a terminal node and since xml can be
    118 ;; represented as a string, non dynamic portions of the page may be
    119 ;; precached quite easily.
    120 (defun esxml--to-xml-recursive (esxml)
    121   (pcase esxml
    122     ((pred stringp)
    123      esxml)
    124     (`(comment nil ,body)
    125      (concat "<!--" body "-->"))
    126     (`(,tag ,attrs . ,body)
    127      ;; code goes here to catch invalid data.
    128      (concat "<" (symbol-name tag)
    129              (when attrs
    130                (concat " " (mapconcat 'esxml--convert-pair attrs " ")))
    131              (if body
    132                  (concat ">" (mapconcat 'esxml--to-xml-recursive body "")
    133                          "</" (symbol-name tag) ">")
    134                "/>")))))
    135 
    136 (defun esxml-to-xml (esxml)
    137   "This translates an esxml expression, i.e. that which is
    138 returned by xml-parse-region.  The structure is defined as a
    139 string or a list where the first element is the tag the second is
    140 an alist of attribute value pairs and the remainder of the list
    141 is 0 or more esxml elements.
    142 
    143  (TAG ATTRS &rest BODY) || STRING
    144 
    145 TAG: is the tag and must be a symbol.
    146 
    147 ATTRS: is an alist of attribute pairs each pair must be of the
    148        form (KEY . VALUE).
    149 
    150 KEY: is the name of the attribute and must be a symbol.
    151 
    152 VALUE: is the value of the attribute and must be a string.
    153 
    154 BODY: is zero or more esxml expressions.  Having no body forms
    155       implies that the tag should be self closed.  If there is
    156       one or more body forms the tag will always be explicitly
    157       closed, even if they are the empty string.
    158 
    159 STRING: if the esxml expression is a string it is returned
    160         unchanged, this allows for caching of any constant parts,
    161         such as headers and footers.
    162 "
    163   (condition-case nil
    164       (esxml--to-xml-recursive esxml)
    165     (error (esxml-validate-form esxml))))
    166 
    167 (defun pp-esxml-to-xml (esxml)
    168   "This translates an esxml expresion as `esxml-to-xml' but
    169 indents it for ease of human readability, it is neccesarrily
    170 slower and will produce longer output."
    171   (pcase esxml
    172     ((pred stringp)
    173      esxml)
    174     (`(comment nil ,body)
    175      (concat "<!--" body "-->"))
    176     (`(,tag ,attrs . ,body)
    177      (cl-check-type tag symbol)
    178      (cl-check-type attrs attrs)
    179      (concat "<" (symbol-name tag)
    180              (when attrs
    181                (concat " " (mapconcat 'esxml--convert-pair attrs " ")))
    182              (if body
    183                  (concat ">" (if (cl-every 'stringp body)
    184                                  (mapconcat 'identity body " ")
    185                                (concat "\n"
    186                                        (replace-regexp-in-string
    187                                         "^" "  "
    188                                         (mapconcat 'pp-esxml-to-xml body "\n"))
    189                                        "\n"))
    190                          "</" (symbol-name tag) ">")
    191                "/>")))
    192     (_
    193      (error "%s is not a valid esxml expression" esxml))))
    194 
    195 (defun sxml-to-esxml (sxml)
    196   "Translates sxml to esxml so the common standard can be used.
    197 See: http://okmij.org/ftp/Scheme/SXML.html."
    198   (pcase sxml
    199     (`(,tag (@ . ,attrs) . ,body)
    200      `(,tag ,(mapcar (lambda (attr)
    201                        (cons (car attr)
    202                              (or (cadr attr)
    203                                  (prin1-to-string (car attr)))))
    204                      attrs)
    205             ,@(mapcar 'sxml-to-esxml body)))
    206     (`(,tag . ,body)
    207      `(,tag nil
    208             ,@(mapcar 'sxml-to-esxml body)))
    209     ((and sxml (pred stringp)) sxml)))
    210 
    211 (defun sxml-to-xml (sxml)
    212   "Translates sxml to xml, via esxml, hey it's only a constant
    213 factor. :)"
    214   (esxml-to-xml (sxml-to-esxml sxml)))
    215 
    216 
    217 
    218 ;; TODO: make agnostic with respect to libxml vs xml.el
    219 (defun xml-to-esxml (string &optional trim)
    220   (with-temp-buffer
    221     (insert string)
    222     (let ((parse-tree (libxml-parse-xml-region (point-min)
    223                                                (point-max))))
    224       (if trim
    225           (esxml-trim-ws parse-tree)
    226         parse-tree))))
    227 
    228 ;; TODO, move to esxpath when mature
    229 (defun esxml-get-by-key (esxml key value)
    230   "Returns a list of all elements whose wttribute KEY match
    231 VALUE.  KEY should be a symbol, and VALUE should be a string.
    232 Will not recurse below a match."
    233   (unless (stringp esxml)
    234     (pcase-let ((`(,tag ,attrs . ,body) esxml))
    235       (if (equal value
    236                  (assoc-default key attrs))
    237           (list esxml)
    238         (apply 'append (mapcar (lambda (sexp)
    239                                  (esxml-get-by-key sexp key value))
    240                                body))))))
    241 
    242 (defun esxml-get-tags (esxml tags)
    243   "Returns a list of all elements whose tag is a member of TAGS.
    244 TAGS should be a list of tags to be matched against. Will not
    245 recurse below a match."
    246   (unless (stringp esxml)
    247     (pcase-let ((`(,tag ,attrs . ,body) esxml))
    248       (if (member tag tags)
    249           (list esxml)
    250         (apply 'append (mapcar (lambda (sexp)
    251                                  (esxml-get-tags sexp tags))
    252                                body))))))
    253 
    254 (defun esxml-get-forms (esxml)
    255   "Returns a list of all forms."
    256   (esxml-get-tags esxml '(form)))
    257 
    258 ;; taken from kv
    259 (defmacro esxml-destructuring-mapcar (args sexp seq)
    260   (declare (indent 2))
    261   (let ((entry (make-symbol "entry")))
    262     `(mapcar (lambda (,entry)
    263                (cl-destructuring-bind ,args ,entry ,sexp))
    264              ,seq)))
    265 
    266 (provide 'esxml)
    267 ;;; esxml.el ends here