esxml.el (9564B)
1 ;;; esxml.el --- Library for working with xml via esxml and sxml 2 ;; Copyright (C) 2012 3 4 ;; Author: Evan Izaksonas-Smith <izak0002 at umn dot edu> 5 ;; Maintainer: Evan Izaksonas-Smith 6 ;; Created: 15th August 2012 7 ;; Version: 0.3.7 8 ;; Keywords: tools, lisp, comm 9 ;; Description: A library for easily generating XML/XHTML in elisp 10 ;; 11 ;; This program is free software; you can redistribute it and/or modify 12 ;; it under the terms of the GNU General Public License as published by 13 ;; the Free Software Foundation, either version 3 of the License, or 14 ;; (at your option) any later version. 15 16 ;; This program is distributed in the hope that it will be useful, 17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 ;; GNU General Public License for more details. 20 21 ;; You should have received a copy of the GNU General Public License 22 ;; along with this program. If not, see <http://www.gnu.org/licenses/>. 23 24 ;;; Commentary: 25 26 ;; This is XML/XHTML done with S-Expressions in EmacsLisp. Simply, 27 ;; this is the easiest way to write HTML or XML in Lisp. 28 ;; 29 ;; This library uses the native form of XML representation as used by 30 ;; many libraries already included within emacs. This representation 31 ;; will be referred to as "esxml" throughout this library. See 32 ;; `esxml-to-xml' for a concise description of the format. 33 ;; 34 ;; This library is not intended to be used directly by a user, though 35 ;; it certainly could be. It could be used to generate static html, 36 ;; or use a library like `elnode' to serve dynamic pages. Or even to 37 ;; extract a form from a site to produce an API. 38 ;; 39 ;; TODO: Better documentation, more convenience. 40 ;; 41 ;; NOTICE: Code base will be transitioning to using pcase instead of 42 ;; destructuring bind wherever possible. If this leads to hard to 43 ;; debug code, please let me know, and I will do whatever I can to 44 ;; resolve these issues. 45 ;; 46 ;;; Code: 47 (require 'cl-lib) 48 (require 'xml) 49 (require 'pcase) 50 51 (defun string-trim-whitespace (string) 52 "A simple function, strips the whitespace from beginning and 53 end of the string. Leaves all other whitespace untouched." 54 (replace-regexp-in-string 55 (rx string-start (* whitespace) 56 (group (+? anything)) 57 (* whitespace) string-end) 58 "\\1" 59 string)) 60 61 (defun esxml-trim-ws (esxml) 62 "This may cause problems, is intended for parsing xml into sxml 63 but may eroneously delete desirable white space." 64 (if (stringp esxml) (string-trim-whitespace esxml) 65 (pcase-let ((`(,tag ,attrs . ,body) esxml)) 66 `(,tag ,attrs 67 ,@(mapcar 'esxml-trim-ws body))))) 68 69 (defun attrp (attr) 70 "Returns t if attr is a an esxml attribute. 71 An esxml attribute is a cons of the form (symbol . string)" 72 (and (consp attr) 73 (symbolp (car attr)) 74 (stringp (cdr attr)))) 75 76 (defun esxml--convert-pair (attr) 77 "Converts from cons cell to attribute pair. Not intended for 78 general use." 79 (pcase-let ((`(,car . ,cdr) attr)) 80 (cl-check-type cdr string) 81 (concat (symbol-name car) 82 "=" 83 (prin1-to-string cdr)))) 84 85 (defun attrsp (attrs) 86 "Returns t if attrs is a list of esxml attributes. 87 88 See: `attrp'" 89 (and (listp attrs) 90 (cl-every (lambda (attr) 91 (and (consp attr) 92 (symbolp (car attr)) 93 (stringp (cdr attr)))) 94 attrs))) 95 96 (defun esxml-validate-form (esxml) 97 "A fast esxml validator. Will error on invalid subparts making 98 it suitable for hindsight testing." 99 (cond ((stringp esxml) nil) 100 ((< (length esxml) 2) 101 (error "%s is too short to be a valid esxml expression" esxml)) 102 (t (pcase-let ((`(,tag ,attrs . ,body) esxml)) 103 (cl-check-type tag symbol) 104 (cl-check-type attrs attrs) 105 (mapcar 'esxml-validate-form body))))) 106 107 ;; While the following could certainly have been written using format, 108 ;; concat makes them easier to read. Update later if neccesary for 109 ;; efficiency. 110 111 ;; Though at first glance the recursive nature of this function might 112 ;; give one pause, since xml is a recursive data type, a recursive 113 ;; parser is an optimal strategy. each node will be visited exactly 114 ;; once during the transformation. 115 ;; 116 ;; Further, since a string is a terminal node and since xml can be 117 ;; represented as a string, non dynamic portions of the page may be 118 ;; precached quite easily. 119 (defun esxml--to-xml-recursive (esxml) 120 (pcase esxml 121 ((pred stringp) 122 esxml) 123 (`(comment nil ,body) 124 (concat "<!--" body "-->")) 125 (`(,tag ,attrs . ,body) 126 ;; code goes here to catch invalid data. 127 (concat "<" (symbol-name tag) 128 (when attrs 129 (concat " " (mapconcat 'esxml--convert-pair attrs " "))) 130 (if body 131 (concat ">" (mapconcat 'esxml--to-xml-recursive body "") 132 "</" (symbol-name tag) ">") 133 "/>"))))) 134 135 (defun esxml-to-xml (esxml) 136 "This translates an esxml expression, i.e. that which is 137 returned by xml-parse-region. The structure is defined as a 138 string or a list where the first element is the tag the second is 139 an alist of attribute value pairs and the remainder of the list 140 is 0 or more esxml elements. 141 142 (TAG ATTRS &rest BODY) || STRING 143 144 TAG: is the tag and must be a symbol. 145 146 ATTRS: is an alist of attribute pairs each pair must be of the 147 form (KEY . VALUE). 148 149 KEY: is the name of the attribute and must be a symbol. 150 151 VALUE: is the value of the attribute and must be a string. 152 153 BODY: is zero or more esxml expressions. Having no body forms 154 implies that the tag should be self closed. If there is 155 one or more body forms the tag will always be explicitly 156 closed, even if they are the empty string. 157 158 STRING: if the esxml expression is a string it is returned 159 unchanged, this allows for caching of any constant parts, 160 such as headers and footers. 161 " 162 (condition-case nil 163 (esxml--to-xml-recursive esxml) 164 (error (esxml-validate-form esxml)))) 165 166 (defun pp-esxml-to-xml (esxml) 167 "This translates an esxml expresion as `esxml-to-xml' but 168 indents it for ease of human readability, it is neccesarrily 169 slower and will produce longer output." 170 (pcase esxml 171 ((pred stringp) 172 esxml) 173 (`(comment nil ,body) 174 (concat "<!--" body "-->")) 175 (`(,tag ,attrs . ,body) 176 (cl-check-type tag symbol) 177 (cl-check-type attrs attrs) 178 (concat "<" (symbol-name tag) 179 (when attrs 180 (concat " " (mapconcat 'esxml--convert-pair attrs " "))) 181 (if body 182 (concat ">" (if (cl-every 'stringp body) 183 (mapconcat 'identity body " ") 184 (concat "\n" 185 (replace-regexp-in-string 186 "^" " " 187 (mapconcat 'pp-esxml-to-xml body "\n")) 188 "\n")) 189 "</" (symbol-name tag) ">") 190 "/>"))) 191 (_ 192 (error "%s is not a valid esxml expression" esxml)))) 193 194 (defun sxml-to-esxml (sxml) 195 "Translates sxml to esxml so the common standard can be used. 196 See: http://okmij.org/ftp/Scheme/SXML.html." 197 (pcase sxml 198 (`(,tag (@ . ,attrs) . ,body) 199 `(,tag ,(mapcar (lambda (attr) 200 (cons (car attr) 201 (or (cadr attr) 202 (prin1-to-string (car attr))))) 203 attrs) 204 ,@(mapcar 'sxml-to-esxml body))) 205 (`(,tag . ,body) 206 `(,tag nil 207 ,@(mapcar 'sxml-to-esxml body))) 208 ((and sxml (pred stringp)) sxml))) 209 210 (defun sxml-to-xml (sxml) 211 "Translates sxml to xml, via esxml, hey it's only a constant 212 factor. :)" 213 (esxml-to-xml (sxml-to-esxml sxml))) 214 215 216 217 ;; TODO: make agnostic with respect to libxml vs xml.el 218 (defun xml-to-esxml (string &optional trim) 219 (with-temp-buffer 220 (insert string) 221 (let ((parse-tree (libxml-parse-xml-region (point-min) 222 (point-max)))) 223 (if trim 224 (esxml-trim-ws parse-tree) 225 parse-tree)))) 226 227 ;; TODO, move to esxpath when mature 228 (defun esxml-get-by-key (esxml key value) 229 "Returns a list of all elements whose wttribute KEY match 230 VALUE. KEY should be a symbol, and VALUE should be a string. 231 Will not recurse below a match." 232 (unless (stringp esxml) 233 (pcase-let ((`(,tag ,attrs . ,body) esxml)) 234 (if (equal value 235 (assoc-default key attrs)) 236 (list esxml) 237 (apply 'append (mapcar (lambda (sexp) 238 (esxml-get-by-key sexp key value)) 239 body)))))) 240 241 (defun esxml-get-tags (esxml tags) 242 "Returns a list of all elements whose tag is a member of TAGS. 243 TAGS should be a list of tags to be matched against. Will not 244 recurse below a match." 245 (unless (stringp esxml) 246 (pcase-let ((`(,tag ,attrs . ,body) esxml)) 247 (if (member tag tags) 248 (list esxml) 249 (apply 'append (mapcar (lambda (sexp) 250 (esxml-get-tags sexp tags)) 251 body)))))) 252 253 (defun esxml-get-forms (esxml) 254 "Returns a list of all forms." 255 (esxml-get-tags esxml '(form))) 256 257 ;; taken from kv 258 (defmacro esxml-destructuring-mapcar (args sexp seq) 259 (declare (indent 2)) 260 (let ((entry (make-symbol "entry"))) 261 `(mapcar (lambda (,entry) 262 (cl-destructuring-bind ,args ,entry ,sexp)) 263 ,seq))) 264 265 (provide 'esxml) 266 ;;; esxml.el ends here