elfeed-curl.el (22441B)
1 ;;; elfeed-curl.el --- curl backend for Elfeed -*- lexical-binding: t; -*- 2 3 ;;; Comments: 4 5 ;; An alternative to `url-retrieve' and `url-queue' that fetches URLs 6 ;; using the curl command line program. 7 8 ;; The API is three functions: 9 10 ;; * `elfeed-curl-retrieve' 11 ;; * `elfeed-curl-retrieve-synchronously' 12 ;; * `elfeed-curl-enqueue' 13 14 ;; And has four buffer-local variables for use in callbacks: 15 16 ;; * `elfeed-curl-headers' 17 ;; * `elfeed-curl-status-code' 18 ;; * `elfeed-curl-error-message' 19 ;; * `elfeed-curl-location' 20 21 ;; The buffer delivered to callbacks may contain multiple requests. It 22 ;; will be narrowed to the specific content for the current request. 23 ;; It's vitally important that callbacks do not kill the buffer 24 ;; because it may be needed for other callbacks. It also means the 25 ;; buffer won't necessarily be around when the callback returns. 26 ;; Callbacks should also avoid editing the buffer, though this 27 ;; generally shouldn't impact other requests. 28 29 ;; Sometimes Elfeed asks curl to retrieve multiple requests and 30 ;; deliver them concatenated. Due to the possibility of HTTP/1.0 being 31 ;; involved — and other ambiguous-length protocols — there's no 32 ;; perfectly unambiguous way to split the output. To work around this, 33 ;; I use curl's --write-out to insert a randomly-generated token after 34 ;; each request. It's highly unlikely (1 in ~1e38) that this token 35 ;; will appear in content, so I can use it to identify the end of each 36 ;; request. 37 38 ;;; Code: 39 40 (require 'url) 41 (require 'cl-lib) 42 (require 'elfeed-lib) 43 (require 'elfeed-log) 44 45 (defcustom elfeed-curl-program-name "curl" 46 "Name/path by which to invoke the curl program." 47 :group 'elfeed 48 :type 'string) 49 50 (defcustom elfeed-curl-max-connections 16 51 "Maximum number of concurrent fetches." 52 :group 'elfeed 53 :type 'integer) 54 55 (defcustom elfeed-curl-timeout 30 56 "Maximum number of seconds a fetch is allowed to take once started." 57 :group 'elfeed 58 :type 'integer) 59 60 (defcustom elfeed-curl-extra-arguments () 61 "A list of additional arguments to pass to cURL. 62 These extra arguments are appended after Elfeed's own arguments, 63 and care must be taken to not interfere with Elfeed's needs. The 64 guideline is to avoid arguments that change anything about cURL's 65 output format." 66 :group 'elfeed 67 :type '(repeat string)) 68 69 (defvar elfeed-curl-queue () 70 "List of pending curl requests.") 71 72 (defvar elfeed-curl-queue-active 0 73 "Number of concurrent requests currently active.") 74 75 (defvar-local elfeed-curl-headers nil 76 "Alist of HTTP response headers.") 77 78 (defvar-local elfeed-curl-status-code nil 79 "Numeric HTTP response code, nil for non-HTTP protocols.") 80 81 (defvar-local elfeed-curl-error-message nil 82 "Human-friendly message describing the error.") 83 84 (defvar-local elfeed-curl-location nil 85 "Actual URL fetched (after any redirects).") 86 87 (defvar-local elfeed-curl--regions () 88 "List of markers bounding separate requests.") 89 90 (defvar-local elfeed-curl--requests () 91 "List of URL / callback pairs for the current buffer.") 92 93 (defvar-local elfeed-curl--token nil 94 "Unique token that splits requests.") 95 96 (defvar-local elfeed-curl--refcount nil 97 "Number of callbacks waiting on the current buffer.") 98 99 (defvar elfeed-curl--error-codes 100 '((1 . "Unsupported protocol.") 101 (2 . "Failed to initialize.") 102 (3 . "URL malformed. The syntax was not correct.") 103 (4 . "A feature or option that was needed to perform the desired request was not enabled or was explicitly disabled at build-time.") 104 (5 . "Couldn't resolve proxy. The given proxy host could not be resolved.") 105 (6 . "Couldn't resolve host. The given remote host was not resolved.") 106 (7 . "Failed to connect to host.") 107 (8 . "FTP weird server reply. The server sent data curl couldn't parse.") 108 (9 . "FTP access denied.") 109 (11 . "FTP weird PASS reply.") 110 (13 . "FTP weird PASV reply.") 111 (14 . "FTP weird 227 format.") 112 (15 . "FTP can't get host.") 113 (16 . "A problem was detected in the HTTP2 framing layer.") 114 (17 . "FTP couldn't set binary.") 115 (18 . "Partial file. Only a part of the file was transferred.") 116 (19 . "FTP couldn't download/access the given file, the RETR (or similar) command failed.") 117 (21 . "FTP quote error. A quote command returned error from the server.") 118 (22 . "HTTP page not retrieved.") 119 (23 . "Write error.") 120 (25 . "FTP couldn't STOR file.") 121 (26 . "Read error. Various reading problems.") 122 (27 . "Out of memory. A memory allocation request failed.") 123 (28 . "Operation timeout.") 124 (30 . "FTP PORT failed.") 125 (31 . "FTP couldn't use REST.") 126 (33 . "HTTP range error. The range \"command\" didn't work.") 127 (34 . "HTTP post error. Internal post-request generation error.") 128 (35 . "SSL connect error. The SSL handshaking failed.") 129 (36 . "FTP bad download resume.") 130 (37 . "FILE couldn't read file.") 131 (38 . "LDAP bind operation failed.") 132 (39 . "LDAP search failed.") 133 (41 . "Function not found. A required LDAP function was not found.") 134 (42 . "Aborted by callback.") 135 (43 . "Internal error. A function was called with a bad parameter.") 136 (45 . "Interface error. A specified outgoing interface could not be used.") 137 (47 . "Too many redirects.") 138 (48 . "Unknown option specified to libcurl.") 139 (49 . "Malformed telnet option.") 140 (51 . "The peer's SSL certificate or SSH MD5 fingerprint was not OK.") 141 (52 . "The server didn't reply anything, which here is considered an error.") 142 (53 . "SSL crypto engine not found.") 143 (54 . "Cannot set SSL crypto engine as default.") 144 (55 . "Failed sending network data.") 145 (56 . "Failure in receiving network data.") 146 (58 . "Problem with the local certificate.") 147 (59 . "Couldn't use specified SSL cipher.") 148 (60 . "Peer certificate cannot be authenticated with known CA certificates.") 149 (61 . "Unrecognized transfer encoding.") 150 (62 . "Invalid LDAP URL.") 151 (63 . "Maximum file size exceeded.") 152 (64 . "Requested FTP SSL level failed.") 153 (65 . "Sending the data requires a rewind that failed.") 154 (66 . "Failed to initialise SSL Engine.") 155 (67 . "The user name, password, or similar was not accepted and curl failed to log in.") 156 (68 . "File not found on TFTP server.") 157 (69 . "Permission problem on TFTP server.") 158 (70 . "Out of disk space on TFTP server.") 159 (71 . "Illegal TFTP operation.") 160 (72 . "Unknown TFTP transfer ID.") 161 (73 . "File already exists (TFTP).") 162 (74 . "No such user (TFTP).") 163 (75 . "Character conversion failed.") 164 (76 . "Character conversion functions required.") 165 (77 . "Problem with reading the SSL CA cert (path? access rights?).") 166 (78 . "The resource referenced in the URL does not exist.") 167 (79 . "An unspecified error occurred during the SSH session.") 168 (80 . "Failed to shut down the SSL connection.") 169 (82 . "Could not load CRL file, missing or wrong format (added in 7.19.0).") 170 (83 . "Issuer check failed (added in 7.19.0).") 171 (84 . "The FTP PRET command failed") 172 (85 . "RTSP: mismatch of CSeq numbers") 173 (86 . "RTSP: mismatch of Session Identifiers") 174 (87 . "unable to parse FTP file list") 175 (88 . "FTP chunk callback reported error") 176 (89 . "No connection available, the session will be queued") 177 (90 . "SSL public key does not matched pinned public key"))) 178 179 (defvar elfeed-curl--capabilities-cache 180 (make-hash-table :test 'eq :weakness 'key) 181 "Used to avoid invoking curl more than once for version info.") 182 183 (defun elfeed-curl-get-capabilities () 184 "Return capabilities plist for the curl at `elfeed-curl-program-name'. 185 :version -- cURL's version string 186 :compression -- non-nil if --compressed is supported 187 :protocols -- symbol list of supported protocols 188 :features -- string list of supported features" 189 (let* ((cache elfeed-curl--capabilities-cache) 190 (cache-value (gethash elfeed-curl-program-name cache))) 191 (if cache-value 192 cache-value 193 (with-temp-buffer 194 (call-process elfeed-curl-program-name nil t nil "--version") 195 (let ((version 196 (progn 197 (setf (point) (point-min)) 198 (when (re-search-forward "[.0-9]+" nil t) 199 (match-string 0)))) 200 (protocols 201 (progn 202 (setf (point) (point-min)) 203 (when (re-search-forward "^Protocols: \\(.*\\)$" nil t) 204 (mapcar #'intern (split-string (match-string 1)))))) 205 (features 206 (progn 207 (setf (point) (point-min)) 208 (when (re-search-forward "^Features: \\(.*\\)$") 209 (split-string (match-string 1)))))) 210 (setf (gethash elfeed-curl-program-name cache) 211 (list :version version 212 :compression (not (null (member "libz" features))) 213 :protocols protocols 214 :features features))))))) 215 216 (defun elfeed-curl-get-version () 217 "Return the version of curl for `elfeed-curl-program-name'." 218 (plist-get (elfeed-curl-get-capabilities) :version)) 219 (make-obsolete 'elfeed-curl-get-version 'elfeed-curl-get-capabilities "3.0.1") 220 221 (defun elfeed-curl--token () 222 "Return a unique, random string that prints as a symbol without escapes. 223 This token is used to split requests. The % is excluded since 224 it's special to --write-out." 225 (let* ((token (make-string 22 ?=)) 226 (set "!$&*+-/0123456789:<>@ABCDEFGHIJKLMNOPQRSTUVWXYZ^_\ 227 abcdefghijklmnopqrstuvwxyz|~")) 228 (prog1 token ; workaround bug#16206 229 (dotimes (i (- (length token) 2)) 230 (setf (aref token (1+ i)) (aref set (cl-random (length set)))))))) 231 232 (defun elfeed-curl--parse-write-out () 233 "Parse curl's write-out (-w) messages into `elfeed-curl--regions'." 234 (widen) 235 (setf (point) (point-max) 236 elfeed-curl--regions ()) 237 (while (> (point) (point-min)) 238 (search-backward elfeed-curl--token) 239 (cl-decf (point)) 240 (let ((end (point))) 241 (cl-destructuring-bind (_ . header) (read (current-buffer)) 242 (setf (point) end) 243 ;; Find next sentinel token 244 (if (search-backward elfeed-curl--token nil t) 245 (search-forward ")" nil t) 246 (setf (point) (point-min))) 247 (let* ((header-start (point)) 248 (header-end (+ (point) header)) 249 (content-start (+ (point) header)) 250 (content-end end) 251 (regions (list header-start header-end 252 content-start content-end)) 253 (markers (cl-loop for p in regions 254 for marker = (make-marker) 255 collect (set-marker marker p)))) 256 (push markers elfeed-curl--regions)))))) 257 258 (defun elfeed-curl--narrow (kind n) 259 "Narrow to Nth region of KIND (:header, :content)." 260 (let ((region (nth n elfeed-curl--regions))) 261 (cl-destructuring-bind (h-start h-end c-start c-end) region 262 (cl-ecase kind 263 (:header (narrow-to-region h-start h-end)) 264 (:content (narrow-to-region c-start c-end)))))) 265 266 (defun elfeed-curl--parse-http-headers () 267 "Parse the current HTTP response headers into buffer-locals. 268 Sets `elfeed-curl-headers'and `elfeed-curl-status-code'. 269 Use `elfeed-curl--narrow' to select a header." 270 (when (> (- (point-max) (point-min)) 0) 271 (setf (point) (point-max)) 272 (re-search-backward "HTTP/[.0-9]+ +\\([0-9]+\\)") 273 (setf elfeed-curl-status-code (string-to-number (match-string 1))) 274 (cl-loop initially (setf (point) (point-max)) 275 while (re-search-backward "^\\([^:]+\\): +\\([^\r\n]+\\)" nil t) 276 for key = (downcase (match-string 1)) 277 for value = (match-string 2) 278 collect (cons key value) into headers 279 finally (setf elfeed-curl-headers headers)))) 280 281 (defun elfeed-curl--decode () 282 "Try to decode the buffer based on the headers." 283 (let ((content-type (cdr (assoc "Content-Type" elfeed-curl-headers)))) 284 (if (and content-type (string-match "charset=\\(.+\\)" content-type)) 285 (decode-coding-region (point-min) (point-max) 286 (coding-system-from-name 287 (match-string 1 content-type))) 288 (decode-coding-region (point-min) (point-max) 'utf-8)))) 289 290 (defun elfeed-curl--final-location (location headers) 291 "Given start LOCATION and HEADERS, find the final location." 292 (cl-loop for (key . value) in headers 293 when (equal key "location") 294 do (setf location (elfeed-update-location location value)) 295 finally return location)) 296 297 (defun elfeed-curl--args (url token &optional headers method data) 298 "Build an argument list for curl for URL. 299 URL can be a string or a list of URL strings." 300 (let* ((args ()) 301 (capabilities (elfeed-curl-get-capabilities))) 302 (push "--disable" args) 303 (when (plist-get capabilities :compression) 304 (push "--compressed" args)) 305 (push "--silent" args) 306 (push "--location" args) 307 (push (format "-w(%s . %%{size_header})" token) args) 308 (push (format "-m%s" elfeed-curl-timeout) args) 309 (push "-D-" args) 310 (dolist (header headers) 311 (cl-destructuring-bind (key . value) header 312 (push (format "-H%s: %s" key value) args))) 313 (when method (push (format "-X%s" method) args)) 314 (when data (push (format "-d%s" data) args)) 315 (setf args (nconc (reverse elfeed-curl-extra-arguments) args)) 316 (if (listp url) 317 (nconc (nreverse args) url) 318 (nreverse (cons url args))))) 319 320 (defun elfeed-curl--prepare-response (url n protocol) 321 "Prepare response N for delivery to user." 322 (elfeed-curl--narrow :header n) 323 (when (eq protocol 'http) 324 (elfeed-curl--parse-http-headers)) 325 (setf elfeed-curl-location 326 (elfeed-curl--final-location url elfeed-curl-headers)) 327 (elfeed-curl--narrow :content n) 328 (elfeed-curl--decode) 329 (current-buffer)) 330 331 (cl-defun elfeed-curl-retrieve-synchronously (url &key headers method data) 332 "Retrieve the contents for URL and return a new buffer with them. 333 334 HEADERS is an alist of additional headers to add to the HTTP request. 335 METHOD is the HTTP method to use. 336 DATA is the content to include in the request." 337 (with-current-buffer (generate-new-buffer " *curl*") 338 (setf elfeed-curl--token (elfeed-curl--token)) 339 (let ((args (elfeed-curl--args url elfeed-curl--token headers method data)) 340 (coding-system-for-read 'binary)) 341 (apply #'call-process elfeed-curl-program-name nil t nil args)) 342 (elfeed-curl--parse-write-out) 343 (elfeed-curl--prepare-response url 0 (elfeed-curl--protocol-type url)))) 344 345 (defun elfeed-curl--protocol-type (url) 346 (let ((scheme (intern (or (url-type (url-generic-parse-url url)) "nil")))) 347 (cl-case scheme 348 ((https nil) 'http) 349 (otherwise scheme)))) 350 351 (defun elfeed-curl--call-callback (buffer n url cb) 352 "Prepare the buffer for callback N and call it." 353 (let ((result nil) 354 (protocol (elfeed-curl--protocol-type url))) 355 (with-current-buffer buffer 356 (setf elfeed-curl-error-message "unable to parse curl response") 357 (unwind-protect 358 (progn 359 (elfeed-curl--prepare-response url n protocol) 360 (cond ((eq protocol 'file) 361 ;; No status code is returned by curl for file:// urls 362 (setf result t 363 elfeed-curl-error-message nil)) 364 ((eq protocol 'gopher) 365 (setf result t 366 elfeed-curl-error-message nil 367 elfeed-curl-status-code nil)) 368 ((and (>= elfeed-curl-status-code 400) 369 (<= elfeed-curl-status-code 599)) 370 (setf elfeed-curl-error-message 371 (format "HTTP %d" elfeed-curl-status-code))) 372 (t 373 (setf result t 374 elfeed-curl-error-message nil))) 375 ;; Always call callback 376 (unwind-protect 377 (funcall cb result) 378 ;; Always clean up 379 (when (zerop (cl-decf elfeed-curl--refcount)) 380 (kill-buffer)))))))) 381 382 (defun elfeed-curl--fail-callback (buffer cb) 383 "Inform the callback the request failed." 384 (with-current-buffer buffer 385 (unwind-protect 386 (funcall cb nil) 387 (when (zerop (cl-decf elfeed-curl--refcount)) 388 (kill-buffer))))) 389 390 (defun elfeed-curl--sentinel (process status) 391 "Manage the end of a curl process' life." 392 (let ((buffer (process-buffer process))) 393 (with-current-buffer buffer 394 ;; Fire off callbacks in separate interpreter turns so they can 395 ;; each fail in isolation from each other. 396 (if (equal status "finished\n") 397 (cl-loop with handler = #'elfeed-curl--call-callback 398 initially do (elfeed-curl--parse-write-out) 399 for (url . cb) in elfeed-curl--requests 400 for n upfrom 0 401 do (run-at-time 0 nil handler buffer n url cb)) 402 (if (string-match "exited abnormally with code \\([0-9]+\\)" status) 403 (let* ((code (string-to-number (match-string 1 status))) 404 (message (cdr (assoc code elfeed-curl--error-codes)))) 405 (setf elfeed-curl-error-message 406 (format "(%d) %s" code 407 (or message "Unknown curl error!")))) 408 (setf elfeed-curl-error-message status)) 409 (cl-loop with handler = #'elfeed-curl--fail-callback 410 for (_ . cb) in elfeed-curl--requests 411 do (run-at-time 0 nil handler buffer cb)))))) 412 413 (cl-defun elfeed-curl-retrieve (url cb &key headers method data) 414 "Retrieve URL contents asynchronously, calling CB with one status argument. 415 416 The callback must *not* kill the buffer! 417 418 The destination buffer is set at the current buffer for the 419 callback. 420 421 HEADERS is an alist of additional headers to add to HTTP requests. 422 METHOD is the HTTP method to use. 423 DATA is the content to include in the request. 424 425 URL can be a list of URLs, which will fetch them all in the same 426 curl process. In this case, CB can also be either a list of the 427 same length, or just a single function to be called once for each 428 URL in the list. Headers will be common to all requests. A TCP or 429 DNS failure in one will cause all to fail, but 4xx and 5xx 430 results will not." 431 (with-current-buffer (generate-new-buffer " *curl*") 432 (setf elfeed-curl--token (elfeed-curl--token)) 433 (let* ((coding-system-for-read 'binary) 434 (process-connection-type nil) 435 (args (elfeed-curl--args url elfeed-curl--token headers method data)) 436 (process (apply #'start-process "elfeed-curl" (current-buffer) 437 elfeed-curl-program-name args))) 438 (prog1 process 439 (if (listp url) 440 (progn 441 (when (functionp cb) 442 (setf cb (make-list (length url) cb))) 443 (setf elfeed-curl--requests (cl-mapcar #'cons url cb) 444 elfeed-curl--refcount (length url))) 445 (push (cons url cb) elfeed-curl--requests) 446 (setf elfeed-curl--refcount 1)) 447 (set-process-query-on-exit-flag process nil) 448 (setf (process-sentinel process) #'elfeed-curl--sentinel))))) 449 450 (defun elfeed-curl--request-key (url headers method data) 451 "Try to fetch URLs with matching keys at the same time." 452 (unless (listp url) 453 (let* ((urlobj (url-generic-parse-url url))) 454 (list (url-type urlobj) 455 (url-host urlobj) 456 (url-portspec urlobj) 457 headers 458 method 459 data)))) 460 461 (defun elfeed-curl--queue-consolidate (queue-in) 462 "Group compatible requests together and return a new queue. 463 Compatible means the requests have the same protocol, domain, 464 port, headers, method, and body, allowing them to be used safely 465 in the same curl invocation." 466 (let ((table (make-hash-table :test 'equal)) 467 (keys ()) 468 (queue-out ())) 469 (dolist (entry queue-in) 470 (cl-destructuring-bind (url _ headers method data) entry 471 (let* ((key (elfeed-curl--request-key url headers method data))) 472 (push key keys) 473 (push entry (gethash key table nil))))) 474 (dolist (key (nreverse keys)) 475 (let ((entry (gethash key table))) 476 (when entry 477 (let ((rotated (list (nreverse (cl-mapcar #'car entry)) 478 (nreverse (cl-mapcar #'cadr entry)) 479 (cl-caddar entry) 480 (elt (car entry) 3) 481 (elt (car entry) 4)))) 482 (push rotated queue-out) 483 (setf (gethash key table) nil))))) 484 (nreverse queue-out))) 485 486 (defun elfeed-curl--queue-wrap (cb) 487 "Wrap the curl CB so that it operates the queue." 488 (lambda (status) 489 (cl-decf elfeed-curl-queue-active) 490 (elfeed-curl--run-queue) 491 (funcall cb status))) 492 493 (defvar elfeed-curl--run-queue-queued nil 494 "Non-nil if run-queue has already been queued for the next turn.") 495 496 (defun elfeed-curl--run-queue () 497 "Possibly fire off some new requests." 498 (when elfeed-curl--run-queue-queued 499 (setf elfeed-curl--run-queue-queued nil 500 ;; Try to consolidate the new requests. 501 elfeed-curl-queue 502 (elfeed-curl--queue-consolidate elfeed-curl-queue))) 503 (while (and (< elfeed-curl-queue-active elfeed-curl-max-connections) 504 (> (length elfeed-curl-queue) 0)) 505 (cl-destructuring-bind (url cb headers method data) (pop elfeed-curl-queue) 506 (elfeed-log 'debug "retrieve %s" url) 507 (cl-incf elfeed-curl-queue-active 1) 508 (elfeed-curl-retrieve 509 url 510 (if (functionp cb) 511 (elfeed-curl--queue-wrap cb) 512 (cons (elfeed-curl--queue-wrap (car cb)) 513 (cdr cb))) 514 :headers headers 515 :method method 516 :data data)))) 517 518 (cl-defun elfeed-curl-enqueue (url cb &key headers method data) 519 "Just like `elfeed-curl-retrieve', but restricts concurrent fetches." 520 (unless (or (stringp url) 521 (and (listp url) (cl-every #'stringp url))) 522 ;; Signal error synchronously instead of asynchronously in the timer 523 (signal 'wrong-type-argument (list 'string-p-or-string-list-p url))) 524 (let ((entry (list url cb headers method data))) 525 (setf elfeed-curl-queue (nconc elfeed-curl-queue (list entry))) 526 (unless elfeed-curl--run-queue-queued 527 (run-at-time 0 nil #'elfeed-curl--run-queue) 528 (setf elfeed-curl--run-queue-queued t)))) 529 530 (provide 'elfeed-curl) 531 532 ;;; elfeed-curl.el ends here