elfeed-csv.el (5733B)
1 ;;; elfeed-csv.el --- export database to CSV files -*- lexical-binding: t; -*- 2 3 ;;; Commentary: 4 5 ;; The `elfeed-csv-export' docstring has a SQL schema recommendation. 6 ;; Given these schemas, these CSV files are trivially imported into a 7 ;; SQLite database using the sqlite3 command line program: 8 9 ;; sqlite> .mode csv 10 ;; sqlite> .import feeds.csv feeds 11 ;; sqlite> .import entries.csv entries 12 ;; sqlite> .import tags.csv tags 13 14 ;; Note: nil values won't be imported as NULL, but as empty strings. 15 16 ;; Here are a few interesting queries to make on your own data: 17 18 ;; For each tag in your database, compute a histogram of posts with 19 ;; 1-hour bins across the the day (0-23), in your local timezone. 20 21 ;; SELECT tag, 22 ;; cast(strftime('%H', date, 'unixepoch', 'localtime') AS INT) AS hour, 23 ;; count(id) AS count 24 ;; FROM entries 25 ;; JOIN tags ON tags.entry = entries.id AND tags.feed = entries.feed 26 ;; GROUP BY tag, hour; 27 28 ;; Like above, but per week-day (0-6). 29 30 ;; SELECT tag, 31 ;; cast(strftime('%w', date, 'unixepoch', 'localtime') AS INT) AS day, 32 ;; count(id) AS count 33 ;; FROM entries 34 ;; JOIN tags ON tags.entry = entries.id AND tags.feed = entries.feed 35 ;; GROUP BY tag, day; 36 37 ;; For each feed, compute the number of entries and last entry date. 38 39 ;; SELECT feeds.title AS title, 40 ;; count(url) AS entry_count, 41 ;; datetime(max(date), 'unixepoch') AS last_entry_date 42 ;; FROM feeds 43 ;; JOIN entries ON feeds.url = entries.feed 44 ;; GROUP BY url 45 ;; ORDER BY max(date) DESC; 46 47 ;; Compute a histogram of entry title lengths. 48 49 ;; SELECT length(title) AS length, 50 ;; count(*) AS count 51 ;; FROM entries 52 ;; GROUP BY length 53 ;; ORDER BY length; 54 55 ;; Again, but this time group by tag. 56 57 ;; SELECT tag, 58 ;; length(title) AS length, 59 ;; count(*) AS count 60 ;; FROM entries 61 ;; JOIN tags ON tags.entry = entries.id AND tags.feed = entries.feed 62 ;; GROUP BY tag, length 63 ;; ORDER BY length; 64 65 ;; What's the relationship between title length and time of day of an 66 ;; entry? (Scatter plot this result.) 67 68 ;; SELECT (date % (24*60*60)) / (24*60*60) AS day_time, 69 ;; length(title) AS length 70 ;; FROM entries 71 ;; JOIN tags ON tags.entry = entries.id AND tags.feed = entries.feed; 72 73 ;;; Code: 74 75 (require 'cl-lib) 76 (require 'elfeed-db) 77 78 (defvar elfeed-csv-nil "" 79 "The string representation to use for nil. 80 Consider let-binding this around your `elfeed-csv-quote' call.") 81 82 (defun elfeed-csv-quote (sexp) 83 "Return CSV string representation of SEXP." 84 (cond ((null sexp) 85 elfeed-csv-nil) 86 ((not (stringp sexp)) 87 (elfeed-csv-quote (prin1-to-string sexp))) 88 ((string-match-p "[\"\n,]" sexp) 89 (concat "\"" (replace-regexp-in-string "\"" "\"\"" sexp) "\"")) 90 (sexp))) 91 92 (defun elfeed-csv-insert (seq) 93 "Insert a row of CSV data to the current buffer." 94 (cl-loop for value being the elements of seq 95 for column upfrom 0 96 when (> column 0) 97 do (insert ",") 98 do (insert (elfeed-csv-quote value)) 99 finally (newline))) 100 101 (cl-defun elfeed-csv-export (feeds-file entries-file tags-file &key headers-p) 102 "Create separate CSV files for feeds, entries, and tags. 103 104 These CSV files are intended for an analysis of an Elfeed 105 database. They are suitable for importing as tables into a 106 relational database such as SQLite. Here's the recommended SQL 107 schema, reflecting the structure of the data. 108 109 CREATE TABLE feeds ( 110 url TEXT PRIMARY KEY, 111 title TEXT, 112 canonical_url TEXT, 113 author TEXT 114 ); 115 116 CREATE TABLE entries ( 117 id TEXT NOT NULL, 118 feed TEXT NOT NULL REFERENCES feeds (url), 119 title TEXT, 120 link TEXT NOT NULL, 121 date REAL NOT NULL, 122 PRIMARY KEY (id, feed) 123 ); 124 125 CREATE TABLE tags ( 126 entry TEXT NOT NULL, 127 feed TEXT NOT NULL, 128 tag TEXT NOT NULL, 129 FOREIGN KEY (entry, feed) REFERENCES entries (id, feed) 130 );" 131 (let ((feeds-buffer (generate-new-buffer " *csv-feeds*")) 132 (entries-buffer (generate-new-buffer " *csv-entries*")) 133 (tags-buffer (generate-new-buffer " *csv-tags*")) 134 (seen (make-hash-table :test 'eq))) 135 ;; Write headers 136 (when headers-p 137 (with-current-buffer feeds-buffer 138 (elfeed-csv-insert [url title canonical-url author])) 139 (with-current-buffer entries-buffer 140 (elfeed-csv-insert [id feed title link date])) 141 (with-current-buffer tags-buffer 142 (elfeed-csv-insert [entry feed tag]))) 143 ;; Write data 144 (with-elfeed-db-visit (entry feed) 145 (unless (gethash feed seen) 146 (setf (gethash feed seen) t) 147 (let ((url (elfeed-feed-url feed)) 148 (title (elfeed-feed-title feed)) 149 (canonical-url (elfeed-meta feed :canonical-url)) 150 (author (elfeed-feed-author feed))) 151 (with-current-buffer feeds-buffer 152 (elfeed-csv-insert (list url title canonical-url author))))) 153 (let ((id (cdr (elfeed-entry-id entry))) 154 (feed-id (elfeed-entry-feed-id entry)) 155 (title (elfeed-entry-title entry)) 156 (link (elfeed-entry-link entry)) 157 (date (elfeed-entry-date entry))) 158 (with-current-buffer entries-buffer 159 (elfeed-csv-insert (list id feed-id title link date))) 160 (with-current-buffer tags-buffer 161 (dolist (tag (elfeed-entry-tags entry)) 162 (elfeed-csv-insert (list id feed-id tag)))))) 163 ;; Write files 164 (with-current-buffer tags-buffer 165 (write-region nil nil tags-file nil 0) 166 (kill-buffer)) 167 (with-current-buffer entries-buffer 168 (write-region nil nil entries-file nil 0) 169 (kill-buffer)) 170 (with-current-buffer feeds-buffer 171 (write-region nil nil feeds-file nil 0) 172 (kill-buffer)))) 173 174 (provide 'elfeed-csv) 175 176 ;;; elfeed-csv.el ends here