dotemacs

My Emacs configuration
git clone git://git.entf.net/dotemacs
Log | Files | Refs | LICENSE

esxml.el (9564B)


      1 ;;; esxml.el --- Library for working with xml via esxml and sxml
      2 ;; Copyright (C) 2012
      3 
      4 ;; Author: Evan Izaksonas-Smith <izak0002 at umn dot edu>
      5 ;; Maintainer: Evan Izaksonas-Smith
      6 ;; Created: 15th August 2012
      7 ;; Version: 0.3.7
      8 ;; Keywords: tools, lisp, comm
      9 ;; Description: A library for easily generating XML/XHTML in elisp
     10 ;;
     11 ;; This program is free software; you can redistribute it and/or modify
     12 ;; it under the terms of the GNU General Public License as published by
     13 ;; the Free Software Foundation, either version 3 of the License, or
     14 ;; (at your option) any later version.
     15 
     16 ;; This program is distributed in the hope that it will be useful,
     17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
     18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     19 ;; GNU General Public License for more details.
     20 
     21 ;; You should have received a copy of the GNU General Public License
     22 ;; along with this program.  If not, see <http://www.gnu.org/licenses/>.
     23 
     24 ;;; Commentary:
     25 
     26 ;; This is XML/XHTML done with S-Expressions in EmacsLisp.  Simply,
     27 ;; this is the easiest way to write HTML or XML in Lisp.
     28 ;;
     29 ;; This library uses the native form of XML representation as used by
     30 ;; many libraries already included within emacs.  This representation
     31 ;; will be referred to as "esxml" throughout this library.  See
     32 ;; `esxml-to-xml' for a concise description of the format.
     33 ;;
     34 ;; This library is not intended to be used directly by a user, though
     35 ;; it certainly could be.  It could be used to generate static html,
     36 ;; or use a library like `elnode' to serve dynamic pages.  Or even to
     37 ;; extract a form from a site to produce an API.
     38 ;;
     39 ;; TODO: Better documentation, more convenience.
     40 ;;
     41 ;; NOTICE: Code base will be transitioning to using pcase instead of
     42 ;; destructuring bind wherever possible.  If this leads to hard to
     43 ;; debug code, please let me know, and I will do whatever I can to
     44 ;; resolve these issues.
     45 ;;
     46 ;;; Code:
     47 (require 'cl-lib)
     48 (require 'xml)
     49 (require 'pcase)
     50 
     51 (defun string-trim-whitespace (string)
     52   "A simple function, strips the whitespace from beginning and
     53 end of the string.  Leaves all other whitespace untouched."
     54   (replace-regexp-in-string
     55    (rx string-start (* whitespace)
     56        (group (+? anything))
     57        (* whitespace) string-end)
     58    "\\1"
     59    string))
     60 
     61 (defun esxml-trim-ws (esxml)
     62   "This may cause problems, is intended for parsing xml into sxml
     63 but may eroneously delete desirable white space."
     64   (if (stringp esxml) (string-trim-whitespace esxml)
     65     (pcase-let ((`(,tag ,attrs . ,body) esxml))
     66       `(,tag ,attrs
     67              ,@(mapcar 'esxml-trim-ws body)))))
     68 
     69 (defun attrp (attr)
     70   "Returns t if attr is a an esxml attribute.
     71 An esxml attribute is a cons of the form (symbol . string)"
     72  (and (consp attr)
     73        (symbolp (car attr))
     74        (stringp (cdr attr))))
     75 
     76 (defun esxml--convert-pair (attr)
     77   "Converts from cons cell to attribute pair.  Not intended for
     78 general use."
     79   (pcase-let ((`(,car . ,cdr) attr))
     80     (cl-check-type cdr string)
     81     (concat (symbol-name car)
     82             "="
     83             (prin1-to-string cdr))))
     84 
     85 (defun attrsp (attrs)
     86     "Returns t if attrs is a list of esxml attributes.
     87 
     88 See: `attrp'"
     89   (and (listp attrs)
     90        (cl-every (lambda (attr)
     91                    (and (consp attr)
     92                         (symbolp (car attr))
     93                         (stringp (cdr attr))))
     94                  attrs)))
     95 
     96 (defun esxml-validate-form (esxml)
     97   "A fast esxml validator.  Will error on invalid subparts making
     98 it suitable for hindsight testing."
     99   (cond ((stringp esxml) nil)
    100         ((< (length esxml) 2)
    101          (error "%s is too short to be a valid esxml expression" esxml))
    102         (t (pcase-let ((`(,tag ,attrs . ,body) esxml))
    103              (cl-check-type tag symbol)
    104              (cl-check-type attrs attrs)
    105              (mapcar 'esxml-validate-form body)))))
    106 
    107 ;; While the following could certainly have been written using format,
    108 ;; concat makes them easier to read.  Update later if neccesary for
    109 ;; efficiency.
    110 
    111 ;; Though at first glance the recursive nature of this function might
    112 ;; give one pause, since xml is a recursive data type, a recursive
    113 ;; parser is an optimal strategy.  each node will be visited exactly
    114 ;; once during the transformation.
    115 ;;
    116 ;; Further, since a string is a terminal node and since xml can be
    117 ;; represented as a string, non dynamic portions of the page may be
    118 ;; precached quite easily.
    119 (defun esxml--to-xml-recursive (esxml)
    120   (pcase esxml
    121     ((pred stringp)
    122      esxml)
    123     (`(comment nil ,body)
    124      (concat "<!--" body "-->"))
    125     (`(,tag ,attrs . ,body)
    126      ;; code goes here to catch invalid data.
    127      (concat "<" (symbol-name tag)
    128              (when attrs
    129                (concat " " (mapconcat 'esxml--convert-pair attrs " ")))
    130              (if body
    131                  (concat ">" (mapconcat 'esxml--to-xml-recursive body "")
    132                          "</" (symbol-name tag) ">")
    133                "/>")))))
    134 
    135 (defun esxml-to-xml (esxml)
    136   "This translates an esxml expression, i.e. that which is
    137 returned by xml-parse-region.  The structure is defined as a
    138 string or a list where the first element is the tag the second is
    139 an alist of attribute value pairs and the remainder of the list
    140 is 0 or more esxml elements.
    141 
    142  (TAG ATTRS &rest BODY) || STRING
    143 
    144 TAG: is the tag and must be a symbol.
    145 
    146 ATTRS: is an alist of attribute pairs each pair must be of the
    147        form (KEY . VALUE).
    148 
    149 KEY: is the name of the attribute and must be a symbol.
    150 
    151 VALUE: is the value of the attribute and must be a string.
    152 
    153 BODY: is zero or more esxml expressions.  Having no body forms
    154       implies that the tag should be self closed.  If there is
    155       one or more body forms the tag will always be explicitly
    156       closed, even if they are the empty string.
    157 
    158 STRING: if the esxml expression is a string it is returned
    159         unchanged, this allows for caching of any constant parts,
    160         such as headers and footers.
    161 "
    162   (condition-case nil
    163       (esxml--to-xml-recursive esxml)
    164     (error (esxml-validate-form esxml))))
    165 
    166 (defun pp-esxml-to-xml (esxml)
    167   "This translates an esxml expresion as `esxml-to-xml' but
    168 indents it for ease of human readability, it is neccesarrily
    169 slower and will produce longer output."
    170   (pcase esxml
    171     ((pred stringp)
    172      esxml)
    173     (`(comment nil ,body)
    174      (concat "<!--" body "-->"))
    175     (`(,tag ,attrs . ,body)
    176      (cl-check-type tag symbol)
    177      (cl-check-type attrs attrs)
    178      (concat "<" (symbol-name tag)
    179              (when attrs
    180                (concat " " (mapconcat 'esxml--convert-pair attrs " ")))
    181              (if body
    182                  (concat ">" (if (cl-every 'stringp body)
    183                                  (mapconcat 'identity body " ")
    184                                (concat "\n"
    185                                        (replace-regexp-in-string
    186                                         "^" "  "
    187                                         (mapconcat 'pp-esxml-to-xml body "\n"))
    188                                        "\n"))
    189                          "</" (symbol-name tag) ">")
    190                "/>")))
    191     (_
    192      (error "%s is not a valid esxml expression" esxml))))
    193 
    194 (defun sxml-to-esxml (sxml)
    195   "Translates sxml to esxml so the common standard can be used.
    196 See: http://okmij.org/ftp/Scheme/SXML.html."
    197   (pcase sxml
    198     (`(,tag (@ . ,attrs) . ,body)
    199      `(,tag ,(mapcar (lambda (attr)
    200                        (cons (car attr)
    201                              (or (cadr attr)
    202                                  (prin1-to-string (car attr)))))
    203                      attrs)
    204             ,@(mapcar 'sxml-to-esxml body)))
    205     (`(,tag . ,body)
    206      `(,tag nil
    207             ,@(mapcar 'sxml-to-esxml body)))
    208     ((and sxml (pred stringp)) sxml)))
    209 
    210 (defun sxml-to-xml (sxml)
    211   "Translates sxml to xml, via esxml, hey it's only a constant
    212 factor. :)"
    213   (esxml-to-xml (sxml-to-esxml sxml)))
    214 
    215 
    216 
    217 ;; TODO: make agnostic with respect to libxml vs xml.el
    218 (defun xml-to-esxml (string &optional trim)
    219   (with-temp-buffer
    220     (insert string)
    221     (let ((parse-tree (libxml-parse-xml-region (point-min)
    222                                                (point-max))))
    223       (if trim
    224           (esxml-trim-ws parse-tree)
    225         parse-tree))))
    226 
    227 ;; TODO, move to esxpath when mature
    228 (defun esxml-get-by-key (esxml key value)
    229   "Returns a list of all elements whose wttribute KEY match
    230 VALUE.  KEY should be a symbol, and VALUE should be a string.
    231 Will not recurse below a match."
    232   (unless (stringp esxml)
    233     (pcase-let ((`(,tag ,attrs . ,body) esxml))
    234       (if (equal value
    235                  (assoc-default key attrs))
    236           (list esxml)
    237         (apply 'append (mapcar (lambda (sexp)
    238                                  (esxml-get-by-key sexp key value))
    239                                body))))))
    240 
    241 (defun esxml-get-tags (esxml tags)
    242   "Returns a list of all elements whose tag is a member of TAGS.
    243 TAGS should be a list of tags to be matched against. Will not
    244 recurse below a match."
    245   (unless (stringp esxml)
    246     (pcase-let ((`(,tag ,attrs . ,body) esxml))
    247       (if (member tag tags)
    248           (list esxml)
    249         (apply 'append (mapcar (lambda (sexp)
    250                                  (esxml-get-tags sexp tags))
    251                                body))))))
    252 
    253 (defun esxml-get-forms (esxml)
    254   "Returns a list of all forms."
    255   (esxml-get-tags esxml '(form)))
    256 
    257 ;; taken from kv
    258 (defmacro esxml-destructuring-mapcar (args sexp seq)
    259   (declare (indent 2))
    260   (let ((entry (make-symbol "entry")))
    261     `(mapcar (lambda (,entry)
    262                (cl-destructuring-bind ,args ,entry ,sexp))
    263              ,seq)))
    264 
    265 (provide 'esxml)
    266 ;;; esxml.el ends here