esxml.el (9605B)
1 ;;; esxml.el --- Library for working with xml via esxml and sxml 2 ;; Copyright (C) 2012 3 4 ;; Author: Evan Izaksonas-Smith <izak0002 at umn dot edu> 5 ;; Maintainer: Evan Izaksonas-Smith 6 ;; URL: https://github.com/tali713/esxml 7 ;; Created: 15th August 2012 8 ;; Version: 0.3.7 9 ;; Keywords: tools, lisp, comm 10 ;; Description: A library for easily generating XML/XHTML in elisp 11 ;; 12 ;; This program is free software; you can redistribute it and/or modify 13 ;; it under the terms of the GNU General Public License as published by 14 ;; the Free Software Foundation, either version 3 of the License, or 15 ;; (at your option) any later version. 16 17 ;; This program is distributed in the hope that it will be useful, 18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 ;; GNU General Public License for more details. 21 22 ;; You should have received a copy of the GNU General Public License 23 ;; along with this program. If not, see <http://www.gnu.org/licenses/>. 24 25 ;;; Commentary: 26 27 ;; This is XML/XHTML done with S-Expressions in EmacsLisp. Simply, 28 ;; this is the easiest way to write HTML or XML in Lisp. 29 ;; 30 ;; This library uses the native form of XML representation as used by 31 ;; many libraries already included within emacs. This representation 32 ;; will be referred to as "esxml" throughout this library. See 33 ;; `esxml-to-xml' for a concise description of the format. 34 ;; 35 ;; This library is not intended to be used directly by a user, though 36 ;; it certainly could be. It could be used to generate static html, 37 ;; or use a library like `elnode' to serve dynamic pages. Or even to 38 ;; extract a form from a site to produce an API. 39 ;; 40 ;; TODO: Better documentation, more convenience. 41 ;; 42 ;; NOTICE: Code base will be transitioning to using pcase instead of 43 ;; destructuring bind wherever possible. If this leads to hard to 44 ;; debug code, please let me know, and I will do whatever I can to 45 ;; resolve these issues. 46 ;; 47 ;;; Code: 48 (require 'cl-lib) 49 (require 'xml) 50 (require 'pcase) 51 52 (defun string-trim-whitespace (string) 53 "A simple function, strips the whitespace from beginning and 54 end of the string. Leaves all other whitespace untouched." 55 (replace-regexp-in-string 56 (rx string-start (* whitespace) 57 (group (+? anything)) 58 (* whitespace) string-end) 59 "\\1" 60 string)) 61 62 (defun esxml-trim-ws (esxml) 63 "This may cause problems, is intended for parsing xml into sxml 64 but may eroneously delete desirable white space." 65 (if (stringp esxml) (string-trim-whitespace esxml) 66 (pcase-let ((`(,tag ,attrs . ,body) esxml)) 67 `(,tag ,attrs 68 ,@(mapcar 'esxml-trim-ws body))))) 69 70 (defun attrp (attr) 71 "Returns t if attr is a an esxml attribute. 72 An esxml attribute is a cons of the form (symbol . string)" 73 (and (consp attr) 74 (symbolp (car attr)) 75 (stringp (cdr attr)))) 76 77 (defun esxml--convert-pair (attr) 78 "Converts from cons cell to attribute pair. Not intended for 79 general use." 80 (pcase-let ((`(,car . ,cdr) attr)) 81 (cl-check-type cdr string) 82 (concat (symbol-name car) 83 "=" 84 (prin1-to-string cdr)))) 85 86 (defun attrsp (attrs) 87 "Returns t if attrs is a list of esxml attributes. 88 89 See: `attrp'" 90 (and (listp attrs) 91 (cl-every (lambda (attr) 92 (and (consp attr) 93 (symbolp (car attr)) 94 (stringp (cdr attr)))) 95 attrs))) 96 97 (defun esxml-validate-form (esxml) 98 "A fast esxml validator. Will error on invalid subparts making 99 it suitable for hindsight testing." 100 (cond ((stringp esxml) nil) 101 ((< (length esxml) 2) 102 (error "%s is too short to be a valid esxml expression" esxml)) 103 (t (pcase-let ((`(,tag ,attrs . ,body) esxml)) 104 (cl-check-type tag symbol) 105 (cl-check-type attrs attrs) 106 (mapcar 'esxml-validate-form body))))) 107 108 ;; While the following could certainly have been written using format, 109 ;; concat makes them easier to read. Update later if neccesary for 110 ;; efficiency. 111 112 ;; Though at first glance the recursive nature of this function might 113 ;; give one pause, since xml is a recursive data type, a recursive 114 ;; parser is an optimal strategy. each node will be visited exactly 115 ;; once during the transformation. 116 ;; 117 ;; Further, since a string is a terminal node and since xml can be 118 ;; represented as a string, non dynamic portions of the page may be 119 ;; precached quite easily. 120 (defun esxml--to-xml-recursive (esxml) 121 (pcase esxml 122 ((pred stringp) 123 esxml) 124 (`(comment nil ,body) 125 (concat "<!--" body "-->")) 126 (`(,tag ,attrs . ,body) 127 ;; code goes here to catch invalid data. 128 (concat "<" (symbol-name tag) 129 (when attrs 130 (concat " " (mapconcat 'esxml--convert-pair attrs " "))) 131 (if body 132 (concat ">" (mapconcat 'esxml--to-xml-recursive body "") 133 "</" (symbol-name tag) ">") 134 "/>"))))) 135 136 (defun esxml-to-xml (esxml) 137 "This translates an esxml expression, i.e. that which is 138 returned by xml-parse-region. The structure is defined as a 139 string or a list where the first element is the tag the second is 140 an alist of attribute value pairs and the remainder of the list 141 is 0 or more esxml elements. 142 143 (TAG ATTRS &rest BODY) || STRING 144 145 TAG: is the tag and must be a symbol. 146 147 ATTRS: is an alist of attribute pairs each pair must be of the 148 form (KEY . VALUE). 149 150 KEY: is the name of the attribute and must be a symbol. 151 152 VALUE: is the value of the attribute and must be a string. 153 154 BODY: is zero or more esxml expressions. Having no body forms 155 implies that the tag should be self closed. If there is 156 one or more body forms the tag will always be explicitly 157 closed, even if they are the empty string. 158 159 STRING: if the esxml expression is a string it is returned 160 unchanged, this allows for caching of any constant parts, 161 such as headers and footers. 162 " 163 (condition-case nil 164 (esxml--to-xml-recursive esxml) 165 (error (esxml-validate-form esxml)))) 166 167 (defun pp-esxml-to-xml (esxml) 168 "This translates an esxml expresion as `esxml-to-xml' but 169 indents it for ease of human readability, it is neccesarrily 170 slower and will produce longer output." 171 (pcase esxml 172 ((pred stringp) 173 esxml) 174 (`(comment nil ,body) 175 (concat "<!--" body "-->")) 176 (`(,tag ,attrs . ,body) 177 (cl-check-type tag symbol) 178 (cl-check-type attrs attrs) 179 (concat "<" (symbol-name tag) 180 (when attrs 181 (concat " " (mapconcat 'esxml--convert-pair attrs " "))) 182 (if body 183 (concat ">" (if (cl-every 'stringp body) 184 (mapconcat 'identity body " ") 185 (concat "\n" 186 (replace-regexp-in-string 187 "^" " " 188 (mapconcat 'pp-esxml-to-xml body "\n")) 189 "\n")) 190 "</" (symbol-name tag) ">") 191 "/>"))) 192 (_ 193 (error "%s is not a valid esxml expression" esxml)))) 194 195 (defun sxml-to-esxml (sxml) 196 "Translates sxml to esxml so the common standard can be used. 197 See: http://okmij.org/ftp/Scheme/SXML.html." 198 (pcase sxml 199 (`(,tag (@ . ,attrs) . ,body) 200 `(,tag ,(mapcar (lambda (attr) 201 (cons (car attr) 202 (or (cadr attr) 203 (prin1-to-string (car attr))))) 204 attrs) 205 ,@(mapcar 'sxml-to-esxml body))) 206 (`(,tag . ,body) 207 `(,tag nil 208 ,@(mapcar 'sxml-to-esxml body))) 209 ((and sxml (pred stringp)) sxml))) 210 211 (defun sxml-to-xml (sxml) 212 "Translates sxml to xml, via esxml, hey it's only a constant 213 factor. :)" 214 (esxml-to-xml (sxml-to-esxml sxml))) 215 216 217 218 ;; TODO: make agnostic with respect to libxml vs xml.el 219 (defun xml-to-esxml (string &optional trim) 220 (with-temp-buffer 221 (insert string) 222 (let ((parse-tree (libxml-parse-xml-region (point-min) 223 (point-max)))) 224 (if trim 225 (esxml-trim-ws parse-tree) 226 parse-tree)))) 227 228 ;; TODO, move to esxpath when mature 229 (defun esxml-get-by-key (esxml key value) 230 "Returns a list of all elements whose wttribute KEY match 231 VALUE. KEY should be a symbol, and VALUE should be a string. 232 Will not recurse below a match." 233 (unless (stringp esxml) 234 (pcase-let ((`(,tag ,attrs . ,body) esxml)) 235 (if (equal value 236 (assoc-default key attrs)) 237 (list esxml) 238 (apply 'append (mapcar (lambda (sexp) 239 (esxml-get-by-key sexp key value)) 240 body)))))) 241 242 (defun esxml-get-tags (esxml tags) 243 "Returns a list of all elements whose tag is a member of TAGS. 244 TAGS should be a list of tags to be matched against. Will not 245 recurse below a match." 246 (unless (stringp esxml) 247 (pcase-let ((`(,tag ,attrs . ,body) esxml)) 248 (if (member tag tags) 249 (list esxml) 250 (apply 'append (mapcar (lambda (sexp) 251 (esxml-get-tags sexp tags)) 252 body)))))) 253 254 (defun esxml-get-forms (esxml) 255 "Returns a list of all forms." 256 (esxml-get-tags esxml '(form))) 257 258 ;; taken from kv 259 (defmacro esxml-destructuring-mapcar (args sexp seq) 260 (declare (indent 2)) 261 (let ((entry (make-symbol "entry"))) 262 `(mapcar (lambda (,entry) 263 (cl-destructuring-bind ,args ,entry ,sexp)) 264 ,seq))) 265 266 (provide 'esxml) 267 ;;; esxml.el ends here