dotemacs

My Emacs configuration
git clone git://git.entf.net/dotemacs
Log | Files | Refs | LICENSE

elfeed-csv.el (5733B)


      1 ;;; elfeed-csv.el --- export database to CSV files -*- lexical-binding: t; -*-
      2 
      3 ;;; Commentary:
      4 
      5 ;; The `elfeed-csv-export' docstring has a SQL schema recommendation.
      6 ;; Given these schemas, these CSV files are trivially imported into a
      7 ;; SQLite database using the sqlite3 command line program:
      8 
      9 ;;   sqlite> .mode csv
     10 ;;   sqlite> .import feeds.csv feeds
     11 ;;   sqlite> .import entries.csv entries
     12 ;;   sqlite> .import tags.csv tags
     13 
     14 ;; Note: nil values won't be imported as NULL, but as empty strings.
     15 
     16 ;; Here are a few interesting queries to make on your own data:
     17 
     18 ;; For each tag in your database, compute a histogram of posts with
     19 ;; 1-hour bins across the the day (0-23), in your local timezone.
     20 
     21 ;; SELECT tag,
     22 ;;        cast(strftime('%H', date, 'unixepoch', 'localtime') AS INT) AS hour,
     23 ;;        count(id) AS count
     24 ;; FROM entries
     25 ;; JOIN tags ON tags.entry = entries.id AND tags.feed = entries.feed
     26 ;; GROUP BY tag, hour;
     27 
     28 ;; Like above, but per week-day (0-6).
     29 
     30 ;; SELECT tag,
     31 ;;        cast(strftime('%w', date, 'unixepoch', 'localtime') AS INT) AS day,
     32 ;;        count(id) AS count
     33 ;; FROM entries
     34 ;; JOIN tags ON tags.entry = entries.id AND tags.feed = entries.feed
     35 ;; GROUP BY tag, day;
     36 
     37 ;; For each feed, compute the number of entries and last entry date.
     38 
     39 ;; SELECT feeds.title AS title,
     40 ;;        count(url) AS entry_count,
     41 ;;        datetime(max(date), 'unixepoch') AS last_entry_date
     42 ;; FROM feeds
     43 ;; JOIN entries ON feeds.url = entries.feed
     44 ;; GROUP BY url
     45 ;; ORDER BY max(date) DESC;
     46 
     47 ;; Compute a histogram of entry title lengths.
     48 
     49 ;; SELECT length(title) AS length,
     50 ;;        count(*) AS count
     51 ;; FROM entries
     52 ;; GROUP BY length
     53 ;; ORDER BY length;
     54 
     55 ;; Again, but this time group by tag.
     56 
     57 ;; SELECT tag,
     58 ;;        length(title) AS length,
     59 ;;        count(*) AS count
     60 ;; FROM entries
     61 ;; JOIN tags ON tags.entry = entries.id AND tags.feed = entries.feed
     62 ;; GROUP BY tag, length
     63 ;; ORDER BY length;
     64 
     65 ;; What's the relationship between title length and time of day of an
     66 ;; entry? (Scatter plot this result.)
     67 
     68 ;; SELECT (date % (24*60*60)) / (24*60*60) AS day_time,
     69 ;;        length(title) AS length
     70 ;; FROM entries
     71 ;; JOIN tags ON tags.entry = entries.id AND tags.feed = entries.feed;
     72 
     73 ;;; Code:
     74 
     75 (require 'cl-lib)
     76 (require 'elfeed-db)
     77 
     78 (defvar elfeed-csv-nil ""
     79   "The string representation to use for nil.
     80 Consider let-binding this around your `elfeed-csv-quote' call.")
     81 
     82 (defun elfeed-csv-quote (sexp)
     83   "Return CSV string representation of SEXP."
     84   (cond ((null sexp)
     85          elfeed-csv-nil)
     86         ((not (stringp sexp))
     87          (elfeed-csv-quote (prin1-to-string sexp)))
     88         ((string-match-p "[\"\n,]" sexp)
     89          (concat "\"" (replace-regexp-in-string "\"" "\"\"" sexp) "\""))
     90         (sexp)))
     91 
     92 (defun elfeed-csv-insert (seq)
     93   "Insert a row of CSV data to the current buffer."
     94   (cl-loop for value being the elements of seq
     95            for column upfrom 0
     96            when (> column 0)
     97            do (insert ",")
     98            do (insert (elfeed-csv-quote value))
     99            finally (newline)))
    100 
    101 (cl-defun elfeed-csv-export (feeds-file entries-file tags-file &key headers-p)
    102   "Create separate CSV files for feeds, entries, and tags.
    103 
    104 These CSV files are intended for an analysis of an Elfeed
    105 database. They are suitable for importing as tables into a
    106 relational database such as SQLite. Here's the recommended SQL
    107 schema, reflecting the structure of the data.
    108 
    109 CREATE TABLE feeds (
    110     url TEXT PRIMARY KEY,
    111     title TEXT,
    112     canonical_url TEXT,
    113     author TEXT
    114 );
    115 
    116 CREATE TABLE entries (
    117     id TEXT NOT NULL,
    118     feed TEXT NOT NULL REFERENCES feeds (url),
    119     title TEXT,
    120     link TEXT NOT NULL,
    121     date REAL NOT NULL,
    122     PRIMARY KEY (id, feed)
    123 );
    124 
    125 CREATE TABLE tags (
    126     entry TEXT NOT NULL,
    127     feed TEXT NOT NULL,
    128     tag TEXT NOT NULL,
    129     FOREIGN KEY (entry, feed) REFERENCES entries (id, feed)
    130 );"
    131   (let ((feeds-buffer (generate-new-buffer " *csv-feeds*"))
    132         (entries-buffer (generate-new-buffer " *csv-entries*"))
    133         (tags-buffer (generate-new-buffer " *csv-tags*"))
    134         (seen (make-hash-table :test 'eq)))
    135     ;; Write headers
    136     (when headers-p
    137       (with-current-buffer feeds-buffer
    138         (elfeed-csv-insert [url title canonical-url author]))
    139       (with-current-buffer entries-buffer
    140         (elfeed-csv-insert [id feed title link date]))
    141       (with-current-buffer tags-buffer
    142         (elfeed-csv-insert [entry feed tag])))
    143     ;; Write data
    144     (with-elfeed-db-visit (entry feed)
    145       (unless (gethash feed seen)
    146         (setf (gethash feed seen) t)
    147         (let ((url (elfeed-feed-url feed))
    148               (title (elfeed-feed-title feed))
    149               (canonical-url (elfeed-meta feed :canonical-url))
    150               (author (elfeed-feed-author feed)))
    151           (with-current-buffer feeds-buffer
    152             (elfeed-csv-insert (list url title canonical-url author)))))
    153       (let ((id (cdr (elfeed-entry-id entry)))
    154             (feed-id (elfeed-entry-feed-id entry))
    155             (title (elfeed-entry-title entry))
    156             (link (elfeed-entry-link entry))
    157             (date (elfeed-entry-date entry)))
    158         (with-current-buffer entries-buffer
    159           (elfeed-csv-insert (list id feed-id title link date)))
    160         (with-current-buffer tags-buffer
    161           (dolist (tag (elfeed-entry-tags entry))
    162             (elfeed-csv-insert (list id feed-id tag))))))
    163     ;; Write files
    164     (with-current-buffer tags-buffer
    165       (write-region nil nil tags-file nil 0)
    166       (kill-buffer))
    167     (with-current-buffer entries-buffer
    168       (write-region nil nil entries-file nil 0)
    169       (kill-buffer))
    170     (with-current-buffer feeds-buffer
    171       (write-region nil nil feeds-file nil 0)
    172       (kill-buffer))))
    173 
    174 (provide 'elfeed-csv)
    175 
    176 ;;; elfeed-csv.el ends here