dotemacs

My Emacs configuration
git clone git://git.entf.net/dotemacs
Log | Files | Refs | LICENSE

lexic.el (86326B)


      1 ;;; lexic.el --- A major mode to find out more about words -*- lexical-binding: t; -*-
      2 
      3 ;; Copyright 2006~2008 pluskid,
      4 ;;           2011~2012 gucong
      5 ;;           2020~2021 tecosaur
      6 
      7 ;; Author: pluskid <pluskid@gmail.com>,
      8 ;;         gucong <gucong43216@gmail.com>,
      9 ;;         TEC <tec@tecosaur.com>
     10 ;;
     11 ;; Maintainer: TEC <tec@tecosaur.com>
     12 ;; Version: 0.0.1
     13 ;; Package-Version: 20220501.1432
     14 ;; Package-Commit: f9b3de4d9c2dd1ce5022383e1a504b87bf7d1b09
     15 ;; Homepage: https://github.com/tecosaur/lexic
     16 ;; Package-Requires: ((emacs "26.3"))
     17 
     18 ;;; License:
     19 
     20 ;; This program is free software; you can redistribute it and/or
     21 ;; modify it under the terms of the GNU General Public License as
     22 ;; published by the Free Software Foundation; either version 2, or (at
     23 ;; your option) any later version.
     24 ;;
     25 ;; This program is distributed in the hope that it will be useful,
     26 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
     27 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     28 ;; GNU General Public License for more details.
     29 ;;
     30 ;; You should have received a copy of the GNU General Public License
     31 ;; along with this program; if not, write to the Free Software
     32 ;; Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
     33 
     34 ;;; Commentary:
     35 
     36 ;; This provides a major mode to view the output of dictionary tools,
     37 ;; and utilities that perform searches and nicely format the results.
     38 ;;
     39 ;; Currently tied to sdcv, but this is intended to be changed in the future.
     40 
     41 ;; Put this file into your load-path and the following into your
     42 ;; ~/.emacs:
     43 ;;   (require 'lexic-mode)
     44 ;;   (global-set-key (kbd "C-c d") 'lexic-search)
     45 
     46 ;;; Changelog:
     47 
     48 ;; 2020/07/28
     49 ;;     * New variable: `lexic-dictionary-specs', allows for
     50 ;;       - Dictionary display names
     51 ;;       - Custom dictionary entry formatters
     52 ;;       - Dictionary sorting
     53 ;;     * Update outline function calls to replace depreciated names.'
     54 ;;     * Tweak lexic-mode
     55 ;;       - Remove font-locking
     56 ;;       - Change `outline-regexp' to ZERO WIDTH SPACE
     57 ;;       - Add `outline-heading-end-regexp', a PUNCTUATION SPACE
     58 ;;       - Expand the mode map, to bind
     59 ;;         - Two modes of entry navigation
     60 ;;         - History navigation
     61 ;;         - TAB for toggling an entry
     62 ;;     * Expand popup window
     63 ;;     * Add linear history navigation
     64 ;;     * Revise behaviour of `lexic-next-entry' and `lexic-previous-entry'
     65 ;;     * New function: `lexic-get-outline-path' which gives the structural path
     66 ;;       to the current position in buffer, e.g. dict → word v. t. → 1. (Chem.)
     67 ;;     * Remove (now unused) custom face vars, could do with adding some
     68 ;;       new face vars in the future
     69 ;;     * Change the default of `lexic-program-path' to be an absolute path
     70 ;;     * New functions: `lexic-format-result', `lexic-failed-p',
     71 ;;       and `lexic-format-failure' to handle the upgraded entry processing
     72 ;;     * New functions: `lexic-format-webster', `lexic-format-online-etym',
     73 ;;       `lexic-format-element', and `lexic-format-soule' to format
     74 ;;       the dictionaries recognised by default in `lexic-dictionary-specs'.
     75 ;;       - with helper functions `lexic-format-webster-diacritics', and
     76 ;;         `lexic-format-expand-abbreviations' for nicer content.
     77 
     78 ;; 2012/01/02
     79 ;;     * New variable: `lexic-word-processor'
     80 ;;     * Breaking change:
     81 ;;       for `lexic-dictionary-alist', non-list (non-nil) value now means full dictionary list
     82 ;;     * Rewrite `lexic-search' for both interactive and non-interactive use
     83 ;;     * `lexic-dictionary-list' is left for customization use only
     84 ;;     * Better highlighting.
     85 ;;
     86 ;; 2011/06/30
     87 ;;     * New feature: parse output for failed lookup
     88 ;;     * Keymap modification
     89 ;;
     90 ;; 2008/06/11
     91 ;;     * lexic-mode v 0.1 init (with background process)
     92 
     93 ;;; Code:
     94 
     95 (require 'outline)
     96 (require 'visual-fill-column nil t)
     97 (require 'cl-lib)
     98 (require 'subr-x)
     99 
    100 (declare-function spell-fu-mode "spell-fu")
    101 
    102 ;;;;##################################################################
    103 ;;;;  User Options, Variables
    104 ;;;;##################################################################
    105 
    106 (defvar lexic-buffer-name "*lexic*"
    107   "The name of the buffer of lexic.")
    108 (defvar lexic-dictionary-list t
    109   "A list of dictionaries to use.
    110 Each entry is a string denoting the name of a dictionary, which
    111 is then passed to lexic through the '-u' command line option.
    112 Any non-list value means using all the dictionaries.")
    113 (defvar lexic-dictionary-alist nil
    114   "An alist of dictionaries, used to interactively form the dictionary list.
    115 It has the form:
    116    ((\"full\" . t)
    117     (\"group1\" \"dict1\" \"dict2\" ...)
    118     (\"group2\" \"dict2\" \"dict3\"))
    119 Any cons cell here means using all dictionaries.")
    120 
    121 (defvar lexic-program-path (executable-find "sdcv")
    122   "The path of lexic program.")
    123 
    124 (defvar lexic-dictionary-path nil
    125   "The path to the dictionaries.")
    126 
    127 (defvar lexic-word-processor nil
    128   "This is the function that take a word (stirng)
    129 and return a word or a list of words for lookup by `lexic-search'.
    130 All lookup result(s) will finally be concatenated together.
    131 
    132 nil value means do nothing with the original word.
    133 
    134 The following is an example.  This function takes the original word and
    135 compare whether simplified and traditional form of the word are the same.
    136 If not, look up both of the words.
    137 
    138       (lambda (word)
    139         (let ((sim (chinese-conv word \"simplified\"))
    140               (tra (chinese-conv word \"traditional\")))
    141           (if (not (string= sim tra))
    142               (list sim tra)
    143             word)))
    144 ")
    145 
    146 
    147 (defvar lexic-current-dictionary-list nil
    148   "A list of dictionaries to use in searches.
    149 Either entries from `lexic-dictionary-alist', or any non-list value,
    150 which will cause all avalible dictionaries to be used.")
    151 
    152 (defvar lexic-wait-timeout 2
    153   "The max time (in seconds) to wait for the lexic process to produce some output.")
    154 (defvar lexic-wait-interval 0.1
    155   "The interval (in seconds) to sleep each time to wait for lexic's output.")
    156 
    157 (defconst lexic-process-name " %lexic-mode-process%")
    158 (defconst lexic-process-buffer-name " *lexic-mode-process*")
    159 
    160 (defvar lexic-word-prompts '("Enter word or phrase: ")
    161   "A list of prompts that lexic use to prompt for word.")
    162 
    163 (defvar lexic-choice-prompts '("Your choice[-1 to abort]: ")
    164   "A list of prompts that lexic use to prompt for a choice of multiple candidates.")
    165 
    166 (defvar lexic-result-patterns '("^Found [0-9]+ items, similar to [*?/|]*\\(.+?\\)[*?]*\\.")
    167   "A list of patterns to extract result word of lexic.
    168 Special characters are stripped.")
    169 
    170 (defvar lexic--search-history nil)
    171 (defvar lexic--search-history-position -1)
    172 
    173 (defvar lexic-expand-abbreviations t
    174   "Whether or not to try to expand abbreviations, where they are expected.")
    175 
    176 
    177 ;;; ==================================================================
    178 ;;; Frontend, search word and display lexic buffer
    179 
    180 ;;;###autoload
    181 (defun lexic-search (word &optional dict-list-name dict-list interactive-p no-history-p)
    182   "Search WORD through the command line tool lexic.
    183 The result will be displayed in buffer named with
    184 `lexic-buffer-name' with `lexic-mode' if called interactively.
    185 
    186 When provided with DICT-LIST-NAME, query `lexic-dictionary-alist'
    187 to get the new dictionary list before search.
    188 Alternatively, dictionary list can be specified directly
    189 by DICT-LIST.  Any non-list value of it means using all dictionaries.
    190 
    191 When called interactively, prompt for the word.
    192 Prefix argument have the following meaning:
    193 If `lexic-dictionary-alist' is defined,
    194 use prefix argument to select a new DICT-LIST-NAME.
    195 Otherwise, prefix argument means using all dictionaries.
    196 
    197 When INTERACTIVE-P is non-nil, a buffer displaying the result(s) is shown.
    198 Otherwise, the result is returned as a string.
    199 
    200 When NO-HISTORY-P is non-nil, the search is not added to the session history.
    201 
    202 Word may contain some special characters:
    203     *       match zero or more characters
    204     ?       match zero or one character
    205     /       used at the beginning, for fuzzy search
    206     |       used at the beginning, for data search
    207     \       escape the character right after
    208 
    209 TODO decouple the tool from the general method."
    210   (interactive
    211    (let* ((dict-list-name
    212            (and current-prefix-arg lexic-dictionary-alist
    213                 (completing-read "Select dictionary list: "
    214                                  lexic-dictionary-alist nil t)))
    215           (dict-list
    216            (and current-prefix-arg (not lexic-dictionary-alist)))
    217           (guess (or (and transient-mark-mode mark-active
    218                           (buffer-substring-no-properties
    219                            (region-beginning) (region-end)))
    220                      (current-word nil t)
    221                      "lexical"))
    222           (word (read-string (format "Search dict (default: %s): " guess)
    223                              nil nil guess)))
    224      (list word dict-list-name dict-list t)))
    225   ;; init current dictionary list
    226   (unless lexic-current-dictionary-list
    227     (setq lexic-current-dictionary-list lexic-dictionary-list))
    228   ;; dict-list-name to dict-list
    229   (when (and (not dict-list) dict-list-name)
    230     (if (not lexic-dictionary-alist)
    231         (error "`lexic-dictionary-alist' not defined"))
    232     (setq dict-list
    233           (cdr (assoc dict-list-name lexic-dictionary-alist))))
    234   ;; prepare new dictionary list
    235   (when (and dict-list (not (equal lexic-current-dictionary-list dict-list)))
    236     (setq lexic-current-dictionary-list dict-list)
    237     ;; kill lexic process
    238     (and (get-process lexic-process-name)
    239          (kill-process (get-process lexic-process-name)))
    240     (while (get-process lexic-process-name)
    241       (sleep-for 0.01)))
    242   (let ((result
    243          (mapconcat
    244           (lambda (w) (lexic-do-lookup w))
    245           (if lexic-word-processor
    246               (let ((processed (funcall lexic-word-processor word)))
    247                 (if (listp processed) processed (list processed)))
    248             (list word))
    249           "")))
    250     (unless (or no-history-p (string= word
    251                                       (nth lexic--search-history-position
    252                                            lexic--search-history)))
    253       (setq lexic--search-history
    254             (append (cl-subseq lexic--search-history
    255                                0 (1+ lexic--search-history-position))
    256                     (list word))
    257             lexic--search-history-position (1- (length lexic--search-history))))
    258     (if (not interactive-p)
    259         result
    260       (with-current-buffer (get-buffer-create lexic-buffer-name)
    261         (setq buffer-read-only nil)
    262         (erase-buffer)
    263         (insert result))
    264       (lexic-goto-lexic)
    265       (lexic-mode)
    266       (lexic-mode-reinit)
    267       (let* ((window (get-buffer-window (lexic-get-buffer)))
    268              (height (window-height window))
    269              (min-height (pcase (count-lines (point-min) (point-max))
    270                            ((pred (> 50)) 12)
    271                            ((pred (> 100)) 16)
    272                            (_ 20))))
    273         (when (> min-height height)
    274           (window-resize window (- 12 height)))))))
    275 
    276 ;;;###autoload
    277 (defun lexic-search-word-at-point ()
    278   "Perform `lexic-search' on the word at or near point."
    279   (interactive)
    280   (lexic-search
    281    (downcase
    282     (or (and transient-mark-mode mark-active
    283              (buffer-substring-no-properties
    284               (region-beginning) (region-end)))
    285         (current-word nil t)
    286         "lexical"))
    287    nil nil t))
    288 
    289 ;;;###autoload
    290 (defun lexic-list-dictionary ()
    291   "Show available dictionaries."
    292   (interactive)
    293   (let (resize-mini-windows)
    294     (shell-command (concat lexic-program-path " -l") lexic-buffer-name)))
    295 
    296 (defun lexic-generate-dictionary-argument ()
    297   "Generate the appropriate stcv dictionary argument.
    298 Using `lexic-current-dictionary-list' and `lexic-dictionary-path'."
    299   (append
    300    (and lexic-dictionary-path (list "--data-dir" (expand-file-name lexic-dictionary-path)))
    301    (and (listp lexic-current-dictionary-list)
    302         (mapcan (lambda (dict)
    303                   (list "-u" dict))
    304                 lexic-current-dictionary-list))))
    305 
    306 (defun lexic-search-history-backwards ()
    307   "Show the previous word searched."
    308   (interactive)
    309   (if (> lexic--search-history-position 0)
    310       (lexic-search (nth (setq lexic--search-history-position
    311                                (1- lexic--search-history-position))
    312                          lexic--search-history)
    313                     nil nil t t)
    314     (message "At start of search history.")))
    315 
    316 (defun lexic-search-history-forwards ()
    317   "Show the next word searched."
    318   (interactive)
    319   (if (> (length lexic--search-history) lexic--search-history-position)
    320       (lexic-search (nth (setq lexic--search-history-position
    321                                (1+ lexic--search-history-position))
    322                          lexic--search-history)
    323                     nil nil t t)
    324     (message "At end of search history.")))
    325 
    326 ;;; ==================================================================
    327 ;;; utilities to switch from and to lexic buffer
    328 (defvar lexic-previous-window-conf nil
    329   "Window configuration before switching to lexic buffer.")
    330 (defun lexic-goto-lexic ()
    331   "Switch to lexic buffer in other window."
    332   (interactive)
    333   (unless (eq (current-buffer)
    334               (lexic-get-buffer))
    335     (setq lexic-previous-window-conf (current-window-configuration)))
    336   (let* ((buffer (lexic-get-buffer))
    337          (window (get-buffer-window buffer)))
    338     (if (null window)
    339         (switch-to-buffer-other-window buffer)
    340       (select-window window))))
    341 
    342 (defun lexic-return-from-lexic ()
    343   "Bury lexic buffer and restore the previous window configuration."
    344   (interactive)
    345   (kill-process (get-process lexic-process-name))
    346   (if (window-configuration-p lexic-previous-window-conf)
    347       (progn
    348         (set-window-configuration lexic-previous-window-conf)
    349         (setq lexic-previous-window-conf nil)
    350         (bury-buffer (lexic-get-buffer)))
    351     (bury-buffer)))
    352 
    353 (defun lexic-get-buffer ()
    354   "Get the lexic buffer. Create one if there's none."
    355   (let ((buffer (get-buffer-create lexic-buffer-name)))
    356     (with-current-buffer buffer
    357       (unless (eq major-mode 'lexic-mode)
    358         (lexic-mode)))
    359     buffer))
    360 
    361 ;;; ==================================================================
    362 
    363 (defvar lexic-mode-map
    364   (let ((map (copy-keymap special-mode-map)))
    365     (define-key map "q" 'lexic-return-from-lexic)
    366     (define-key map (kbd "RET") 'lexic-search-word-at-point)
    367     (define-key map "a" 'outline-show-all)
    368     (define-key map "h" 'outline-hide-body)
    369     (define-key map "o" 'lexic-toggle-entry)
    370     (define-key map (kbd "TAB") 'lexic-toggle-entry)
    371     (define-key map "n" 'lexic-next-entry)
    372     (define-key map "N" (lambda () (interactive) (lexic-next-entry t)))
    373     (define-key map "p" 'lexic-previous-entry)
    374     (define-key map "P" (lambda () (interactive) (lexic-previous-entry t)))
    375     (define-key map "b" 'lexic-search-history-backwards)
    376     (define-key map "f" 'lexic-search-history-forwards)
    377     (set-keymap-parent map special-mode-map)
    378     map)
    379   "Keymap for `lexic-mode'.")
    380 
    381 
    382 (define-derived-mode lexic-mode fundamental-mode "lexic"
    383   "Major mode to look up word through lexic.
    384 \\{lexic-mode-map}
    385 Turning on lexic mode runs the normal hook `lexic-mode-hook'.
    386 
    387 This mode locally removes any `spell-fu-mode' or `flyspell-mode' entries in
    388 `text-mode-hook', but won't catch any other spell-checking initialisation.
    389 Consider resolving any edge cases with an addition to `lexic-mode-hook'."
    390   (setq buffer-read-only t)
    391   (add-hook 'kill-buffer-hook
    392             (lambda ()
    393               (let ((proc (get-process lexic-process-name)))
    394                 (when (process-live-p proc)
    395                   (kill-process proc))))
    396             nil t)
    397   (setq-local outline-regexp "\u200B+")
    398   (setq-local outline-heading-end-regexp "\u2008")
    399   (when (featurep 'visual-fill-column)
    400     (setq-local visual-fill-column-center-text t)
    401     (visual-fill-column-mode 1)))
    402 
    403 (defun lexic-mode-reinit ()
    404   "Re-initialize buffer.
    405 Hide all entrys but the first one and goto
    406 the beginning of the buffer."
    407   (ignore-errors
    408     (setq buffer-read-only nil)
    409     (lexic-parse-failed)
    410     (setq buffer-read-only t)
    411 
    412     (let* ((window (get-buffer-window (lexic-get-buffer)))
    413            (win-height (window-height window))
    414            (content-height (count-lines (point-min) (point-max))))
    415       (when (> 0.5 (/ (float win-height) content-height))
    416         (outline-hide-sublevels 3)))
    417 
    418     (goto-char (point-min))
    419     (search-forward "\u200B\u200B")
    420     (left-char 1)))
    421 
    422 (defun lexic-parse-failed ()
    423   "Determine if the search failed, and if so parse the failure."
    424   (goto-char (point-min))
    425   (let (save-word)
    426     (while (re-search-forward "^[0-9]+).*-->\\(.*\\)$" nil t)
    427       (let ((cur-word (match-string-no-properties 1)))
    428         (unless (string= save-word cur-word)
    429           (setq save-word cur-word)
    430           (re-search-backward "^\\(.\\)" nil t)
    431           (insert (format "\n==>%s\n" save-word)))))))
    432 
    433 (defun lexic-expand-entry ()
    434   "Show the children of the current entry, or subtree if there are none."
    435   (outline-show-children)
    436   (when ; no children
    437       (<= 0 (- (save-excursion (outline-next-heading) (point))
    438                (save-excursion (outline-end-of-subtree) (point))))
    439     (outline-show-subtree)))
    440 
    441 (defun lexic-next-entry (&optional linear)
    442   "Move to the next entry, targeting the same level unless LINEAR is set."
    443   (interactive)
    444   (when (< 1 (lexic-outline-level))
    445     (outline-hide-subtree))
    446   (if linear
    447       (outline-next-heading)
    448     (condition-case nil
    449         (outline-forward-same-level 1)
    450       (error
    451        (condition-case nil
    452            (progn
    453              (outline-up-heading 1 t)
    454              (outline-forward-same-level 1))
    455          (error (progn (outline-next-heading)
    456                        (lexic-expand-entry)))))))
    457   (lexic-expand-entry)
    458   (recenter-top-bottom 1)
    459   (message "%s" (lexic-get-outline-path)))
    460 
    461 (defun lexic-previous-entry (&optional linear)
    462   "Move to the previous entry, targeting the same level unless LINEAR is set."
    463   (interactive)
    464   (outline-hide-subtree)
    465   (if (= 2 (line-number-at-pos))
    466       (recenter-top-bottom -1)
    467     (if linear
    468         (outline-previous-heading)
    469       (condition-case nil
    470           (outline-backward-same-level 1)
    471         (error
    472          (condition-case nil
    473              (outline-up-heading 1 t)
    474            (error (outline-previous-heading))))))
    475     (lexic-expand-entry)
    476     (recenter-top-bottom 2))
    477   (message "%s" (lexic-get-outline-path)))
    478 
    479 (defun lexic-toggle-entry ()
    480   "Toggle the folding of the lexic entry point currently lies in."
    481   (interactive)
    482   (save-excursion
    483     (outline-back-to-heading)
    484     (if (not (save-excursion
    485                (outline-end-of-heading)
    486                (outline-invisible-p (line-end-position))))
    487         (outline-hide-subtree)
    488       (outline-show-subtree))))
    489 
    490 ;;; ==================================================================
    491 ;;; Support for lexic process in background
    492 (defun lexic-do-lookup (word &optional raw-p)
    493   "Send the WORD to the lexic process and return the result.
    494 Optional argument RAW-P signals whether the result should be formatted or not."
    495   (let ((process (lexic-get-process)))
    496     (process-send-string process (concat word "\n"))
    497     (with-current-buffer (process-buffer process)
    498       (let ((i 0) result done)
    499         (while (and (not done)
    500                     (< i lexic-wait-timeout))
    501           (when (lexic-match-tail lexic-word-prompts)
    502             (setq result (buffer-substring-no-properties (point-min)
    503                                                          (point-max)))
    504             (setq done t))
    505           (when (lexic-match-tail lexic-choice-prompts)
    506             (process-send-string process "-1\n"))
    507           (unless done
    508             (sleep-for lexic-wait-interval)
    509             (setq i (+ i lexic-wait-interval))))
    510         (unless (< i lexic-wait-timeout)
    511           ;; timeout
    512           (kill-process process)
    513           (error "ERROR: timeout waiting for lexic"))
    514         (erase-buffer)
    515         (if raw-p result
    516           (lexic-format-result result))))))
    517 
    518 (defun lexic-oneshot-lookup (word &optional raw-p args)
    519   "Use a oneshot stcv process just to look up WORD, with ARGS.
    520 Optional argument RAW-P signals whether the result should be formatted or not."
    521   (let ((result (with-temp-buffer
    522                   (apply #'call-process lexic-program-path nil t nil
    523                          (append '("-n") args (list word)))
    524                   (buffer-string))))
    525     (if raw-p result
    526       (lexic-format-result result))))
    527 
    528 (defun lexic-get-process ()
    529   "Get or create the lexic process."
    530   (let ((process (get-process lexic-process-name)))
    531     (unless process
    532       (with-current-buffer (get-buffer-create
    533                             lexic-process-buffer-name)
    534         (erase-buffer)
    535         (setq process (apply #'start-process
    536                              lexic-process-name
    537                              lexic-process-buffer-name
    538                              lexic-program-path
    539                              (lexic-generate-dictionary-argument)))
    540         (set-process-query-on-exit-flag process nil)
    541         ;; kill the initial prompt
    542         (let ((i 0))
    543           (message "starting lexic...")
    544           (while (and (not (lexic-match-tail lexic-word-prompts))
    545                       (< i lexic-wait-timeout))
    546             (sit-for lexic-wait-interval t)
    547             (setq i (+ i lexic-wait-interval)))
    548           (unless (< i lexic-wait-timeout)
    549             ;; timeout
    550             (kill-process process)
    551             (error "ERROR: timeout waiting for lexic"))
    552           (erase-buffer))
    553         (message "")))
    554     process))
    555 
    556 (defun lexic-buffer-tail (length)
    557   "Get a substring of length LENGTH at the end of current buffer."
    558   (let ((beg (- (point-max) length))
    559         (end (point-max)))
    560     (if (< beg (point-min))
    561         (setq beg (point-min)))
    562     (buffer-substring-no-properties beg end)))
    563 
    564 (defun lexic-match-tail (prompts)
    565   "Look for a sdcv prompt from PROMPTS in the tail of the current buffer.
    566 Remove it and return t if found. Return nil otherwise."
    567   (let ((done nil)
    568         (prompt nil))
    569     (while (and (not done)
    570                 prompts)
    571       (setq prompt (car prompts))
    572       (setq prompts (cdr prompts))
    573       (when (string-equal prompt
    574                           (lexic-buffer-tail (length prompt)))
    575         (delete-region (- (point-max) (length prompt))
    576                        (point-max))
    577         (setq done t)))
    578     done))
    579 
    580 ;;;;##################################################################
    581 ;;;;  Output Processing
    582 ;;;;##################################################################
    583 
    584 (defun lexic-format-result (result)
    585   "For a RESULT from lexic, test for failure and format accordingly.
    586 Entries are sorted by their :priority in `lexic-dictionary-specs' then formatted
    587 by `lexic-format-result' in successful case, `cases-format-failure' otherwise."
    588   (cond
    589    ((string-match-p "^Nothing similar to" result)
    590     (lexic-consider-no-results))
    591    ((lexic-failed-p result)
    592     (lexic-format-failure result))
    593    (t
    594     (let* ((entries
    595             (sort (lexic-parse-results result)
    596                   (lambda (a b)
    597                     (< (or (lexic-dictionary-spec (plist-get a :dict) :priority) 1)
    598                        (or (lexic-dictionary-spec (plist-get b :dict) :priority) 1)))))
    599            (word (save-match-data
    600                    (string-match "\\`Found.* similar to \\(\\w+\\)\\." result)
    601                    (downcase (match-string 1 result)))))
    602       (concat
    603        "\u200B"
    604        (propertize (capitalize word) 'face 'outline-1)
    605        "\u2008"
    606        (apply #'concat
    607               (mapcar (lambda (e)
    608                         (lexic-format-entry
    609                          e word))
    610                       entries)))))))
    611 
    612 (defun lexic-consider-no-results ()
    613   "No results have been found. What should we tell the user?"
    614   (let ((dicts? (not (string-match-p "\\`Dictionary's name +Word count[\n ]+\\'"
    615                                      (shell-command-to-string (concat lexic-program-path " -l"))))))
    616     (if dicts?
    617         (user-error "Couldn't find anything similar to your search, sorry :(")
    618       (user-error "No results found, but you don't seem to have any dictionaries installed! Try %s"
    619                   (propertize "M-x lexic-dictionary-help" 'face 'font-lock-keyword-face)))))
    620 
    621 (defun lexic-parse-results (result)
    622   "Loop through every entry in RESULT and parse each one.
    623 Returns a list of plists with keys :word, :dict, and :info."
    624   (let (entries latest-match last-match dict word)
    625     (with-temp-buffer
    626       (insert result)
    627       (goto-char (point-min))
    628       (while
    629           (setq latest-match (re-search-forward
    630                               "-->\\([^\n]+\\)\n-->\\(.+\\)\n\n" nil t))
    631         (when last-match
    632           (forward-line -3)
    633           (setq entries
    634                 (append entries
    635                         `((:word ,word
    636                            :dict ,dict
    637                            :info ,(buffer-substring last-match (point))))))
    638           (forward-line 3))
    639         (setq last-match latest-match)
    640         (setq dict (match-string 1))
    641         (setq word (match-string 2)))
    642       (when last-match
    643         (setq entries
    644               (append entries
    645                       `((:word ,word
    646                          :dict ,dict
    647                          :info ,(buffer-substring last-match (point-max))))))))))
    648 
    649 (defun lexic-failed-p (results)
    650   "Whether the RESULTS match the hardcoded failure pattern."
    651   (if (string-match-p "Found [0-9]+ items, similar to [^.]+\\.\n0)" results) t nil))
    652 
    653 (defun lexic-format-failure (results)
    654   "When lexic failed to match the word, format the suggestions in RESULTS."
    655   (let (suggestions last-match)
    656     (while (setq last-match
    657                  (string-match "^[0-9]+)\\(.*\\)-->\\([A-Za-z]+\\)"
    658                                results
    659                                (when last-match (1+ last-match))))
    660       (let ((dict (match-string 1 results))
    661             (word (match-string 2 results)))
    662         (if (assoc dict suggestions)
    663             (setcdr (assoc dict suggestions)
    664                     (list (append (cadr (assoc dict suggestions)) (list word))))
    665           (setq suggestions (append suggestions `((,dict . ((,word)))))))))
    666     (concat
    667      (propertize
    668       (replace-regexp-in-string
    669        "items" "entries"
    670        (substring results 0 (string-match "\n" results)))
    671       'face 'warning)
    672      "\n"
    673      (mapconcat (lambda (dict-suggestions)
    674                   (format "\u200B\u200B%s\n\u200B\u200B\u200B%s"
    675                           (propertize (or
    676                                        (lexic-dictionary-spec (car dict-suggestions) :short)
    677                                        (car dict-suggestions))
    678                                       'face 'outline-3)
    679                           (propertize
    680                            (mapconcat #'identity (cadr dict-suggestions) "\n\u200B\u200B\u200B")
    681                            'face 'font-lock-keyword-face)))
    682                 (sort suggestions
    683                       (lambda (a b)
    684                         (< (or (lexic-dictionary-spec (car a) :priority) 1)
    685                            (or (lexic-dictionary-spec (car b) :priority) 1))))
    686                 "\n"))))
    687 
    688 (defun lexic-format-entry (entry &optional expected-word)
    689   "Format a given ENTRY, a plist with :word :dict and :info.
    690 If the DICT has a :short value in `lexic-dictionary-specs' that is used as
    691 the display name. Likewise if present, :formatter is used to generate the
    692 entry. EXPECTED-WORD is the word expected in ENTRY."
    693   (let ((dict (plist-get entry :dict)))
    694     (concat
    695      "\n\u200B\u200B"
    696      (propertize (or (lexic-dictionary-spec dict :short)
    697                      dict) 'face 'outline-3)
    698      "\n\u2008\n"
    699      (if-let* ((formatter (lexic-dictionary-spec dict :formatter)))
    700          (let ((case-fold-search nil))
    701            (string-trim (funcall formatter entry expected-word)))
    702        (plist-get entry :info))
    703      "\n")))
    704 
    705 (defun lexic-get-outline-path ()
    706   "Return a string giving the structural path to the current position."
    707   (let ((outline-path "")
    708         (last-pos 0)
    709         outline-level-current substring level-regexp)
    710     (save-excursion
    711       (outline-back-to-heading)
    712       (setq outline-level-current (lexic-outline-level))
    713       (while (/= (point) last-pos)
    714         (setq outline-level-current (lexic-outline-level))
    715         (setq substring
    716               (buffer-substring
    717                (point)
    718                (save-excursion (search-forward "\u2008") (point))))
    719         (setq level-regexp
    720               (pcase outline-level-current
    721                 (1 "^\\([^\n]+\\)")
    722                 (2 "^\\([^ \n]+\\)")
    723                 (3 "^\u200B\u200B*\\([^,]+\\(?:, [ &.;a-z]+\\)?\\)")
    724                 (4 "\\([0-9]+\\.\\(  ?([^)]+)\\)?\\( \\w+\\)\\{0,4\\}\\)")
    725                 (5 "\\(([a-z])\\(  ?([^)]+)\\)?\\( \\w+\\)\\{0,4\\}\\)")
    726                 (_ "^\u200B\u200B*\\([^ ]+\\)")))
    727         (setq outline-path
    728               (concat
    729                (propertize " → " 'face 'bold)
    730                (save-match-data
    731                  (string-match level-regexp substring)
    732                  (match-string 1 substring))
    733                outline-path))
    734         (setq last-pos (point))
    735         (ignore-errors
    736           (outline-up-heading 1)))
    737       (substring outline-path 2))))
    738 
    739 (defun lexic-outline-level ()
    740   "It seems that while (outline-level) should work, it has issues."
    741   (- (save-excursion (outline-back-to-heading)
    742                      (search-forward-regexp "\u200B+"))
    743      (point)))
    744 
    745 (defvar lexic-dictionary-specs
    746   '(("Webster's Revised Unabridged Dictionary (1913)"
    747      :formatter lexic-format-webster
    748      :priority 1)
    749     ("Elements database"
    750      :short "Element"
    751      :formatter lexic-format-element
    752      :priority 2)
    753     ("Hitchcock's Bible Names Dictionary"
    754      :short "Hitcchcock's Bible Names"
    755      :priority 3)
    756     ("Online Etymology Dictionary"
    757      :short "Etymology"
    758      :formatter lexic-format-online-etym
    759      :priority 4)
    760     ("Soule's Dictionary of English Synonyms"
    761      :short "Synonyms"
    762      :formatter lexic-format-soule
    763      :priority 5))
    764   "List of dictionary specifications.
    765 In each entry the car is the name according to lexic, and the cdr is
    766 a plist whith the following options:
    767   :short - a (usually) shorter display name for the dictionary
    768   :formatter - a function with signature (ENTRY WORD) that returns a string
    769   :priority - sort priority, defaults to 1")
    770 
    771 (defun lexic-dictionary-spec (dict spec)
    772   "Helper function to get a :SPEC of a given DICT."
    773   (plist-get (cdr (assoc dict lexic-dictionary-specs)) spec))
    774 
    775 (defun lexic-format-webster (entry &optional _expected-word)
    776   "Make a Webster's dictionary ENTRY for WORD look nice.
    777 Designed for Webster's Revised Unabridged Dictionary (1913),as found at
    778 http://download.huzheng.org/dict.org/stardict-dictd-web1913-2.4.2.tar.bz2.
    779 
    780 This should also work nicely with GCIDE."
    781   (thread-last (plist-get entry :info)
    782        (lexic-format-webster-diacritics)
    783        (replace-regexp-in-string ; entry dividors
    784         (format "\n\n\\(%s\\)" (plist-get entry :word))
    785         "\n                     ━━━━━━━━━ ■ ━━━━━━━━━\n\n\\1")
    786        (replace-regexp-in-string ; entry headline
    787         (rx line-start
    788             (group-n 1 ; word
    789                      (any "A-Z")
    790                      (+ (any "a-z")))
    791             (optional " \\" ; word2
    792                       (group-n 2 (+ (not (any "\\"))))
    793                       "\\")
    794             (optional " (" ; pronounciation
    795                       (group-n 3 (+ (not (any ")"))))
    796                       ")")
    797             ", "
    798             (group-n 4 ; part of speech
    799                      (+ (any "A-Z" "a-z" ".;&" " ")))
    800             (optional "[" ; etymology / alternative forms
    801                       (group-n 5
    802                                (+ (or (+ (not (any "][")))
    803                                       (and "[" (+ (not (any "]["))) "]"))))
    804                       "]")
    805             (optional ; definitely etymology
    806              (+ (any "\n" " ")) "["
    807              (group-n 6
    808                       (+ (or (+ (not (any "][")))
    809                              (and "[" (+ (not (any "]["))) "]"))))
    810              "]")
    811             (optional " (" ; category
    812                       (group-n 7 (+ (not (any ")"))))
    813                       ")"))
    814         (lambda (match)
    815           (let* ((word2 (match-string 2 match))
    816                  (pronounciation (match-string 3 match))
    817                  (part-of-speech (lexic-format-expand-abbreviations
    818                                   (replace-regexp-in-string " \\'" ""
    819                                                             (match-string 4 match))))
    820                  (alternative-forms (when (match-string 6 match)
    821                                       (lexic-format-expand-abbreviations (match-string 5 match))))
    822                  (etymology (lexic-format-expand-abbreviations (match-string (if alternative-forms 6 5) match)))
    823                  (category (lexic-format-expand-abbreviations (match-string 7 match)))
    824                  (last-newline (lambda (text) (- (length text)
    825                                             (or (save-match-data
    826                                                   (string-match "\n[^\n]*\\'" text)) 0)))))
    827             (concat
    828              "\u200B\u200B\u200B"
    829              (propertize word2
    830                          'face 'bold)
    831              (when pronounciation
    832                (propertize (format " %s" pronounciation)
    833                            'face 'font-lock-type-face))
    834              ", "
    835              (propertize part-of-speech
    836                          'face '(bold font-lock-keyword-face))
    837              (when alternative-forms
    838                (setq alternative-forms
    839                      (lexic-format-reflow-text
    840                       (format " [%s]" alternative-forms)
    841                       80 10
    842                       (+ 3 (if pronounciation 1 0)
    843                          (funcall last-newline
    844                                   (concat word2 pronounciation part-of-speech)))
    845                       "   "))
    846                (propertize alternative-forms
    847                            'face 'diff-context))
    848              (when etymology
    849                (setq etymology
    850                      (lexic-format-reflow-text
    851                       (format " [%s]" etymology)
    852                       80 10
    853                       (+ 3 (if pronounciation 1 0)
    854                          (funcall last-newline
    855                                   (concat word2 pronounciation part-of-speech alternative-forms)))
    856                       "   "))
    857                (propertize etymology
    858                            'face 'font-lock-comment-face))
    859              (when category
    860                (propertize (format " (%s)" category)
    861                            'face 'font-lock-constant-face))
    862              "\u2008"))))
    863        (replace-regexp-in-string ; categorised terms
    864         "{\\([^}]+?\\)}\\(.?\\) (\\([^)]+?\\))"
    865         (lambda (match)
    866           (let ((term (match-string 1 match))
    867                 (punct (match-string 2 match))
    868                 (category (match-string 3 match)))
    869             (concat
    870              (propertize term 'face 'font-lock-keyword-face)
    871              punct
    872              (propertize (format " (%s)"
    873                                  (if lexic-expand-abbreviations
    874                                      (lexic-format-expand-abbreviations category)
    875                                    category))
    876                          'face 'font-lock-constant-face)))))
    877        (replace-regexp-in-string ; other terms
    878         "{\\([^}]+?\\)}"
    879         (lambda (match)
    880           (let ((term (match-string 1 match)))
    881             (concat
    882              (propertize term 'face 'font-lock-keyword-face)))))
    883        (replace-regexp-in-string ; quotations
    884         "^\n            +\\(\\w[[:ascii:]]+?\\)\\(\n? *--[A-Za-z0-9. ]+\n? *[A-Za-z0-9. ]*\\)"
    885         (lambda (match)
    886           (let ((body (match-string 1 match))
    887                 (author (match-string 2 match)))
    888             (concat
    889              "\n           "
    890              (propertize (format "❝%s❞" body)
    891                          'face 'font-lock-doc-face)
    892              author "\n"))))
    893        (replace-regexp-in-string ; attributions
    894         " --\\([A-Z][A-Za-z. ]+\n? *[A-Za-z0-9. ]*\\)"
    895         (lambda (match)
    896           (propertize (concat " ──" (match-string 1 match))
    897                       'face '(italic font-lock-type-face))))
    898        (replace-regexp-in-string ; inline quotations (1)
    899         "``" "“")
    900        (replace-regexp-in-string ; inline quotations (1)
    901         "''" "”")
    902        (replace-regexp-in-string ; em dash approximation
    903         " -- " " ─── ")
    904        (replace-regexp-in-string ; lists
    905         "   \\(?:\\([0-9]+\\.\\)\\|\\(   ([a-z])\\)\\) \\(?: ?(\\([^)]+\\)) \\)?\\(.*\\)"
    906         (lambda (match)
    907           (let ((number (match-string 1 match))
    908                 (letter (match-string 2 match))
    909                 (category (match-string 3 match))
    910                 (rest-of-line (match-string 4 match)))
    911             (concat
    912              (when letter "\u200B")
    913              "\u200B\u200B\u200B\u200B   "
    914              (when number
    915                (propertize number 'face '(bold font-lock-string-face)))
    916              (when letter
    917                (propertize letter 'face 'font-lock-string-face))
    918              (when category
    919                (propertize (format " (%s)"
    920                                    (if lexic-expand-abbreviations
    921                                        (lexic-format-expand-abbreviations category)
    922                                      category))
    923                            'face 'font-lock-constant-face))
    924              " "
    925              rest-of-line
    926              "\u2008"))))
    927        (replace-regexp-in-string ; note
    928         "  Note: "
    929         (concat "     "
    930                 (propertize " " 'display '(space . (:width 0.55)))
    931                 (propertize "☞" 'face 'font-lock-function-name-face)
    932                 " "))
    933        (replace-regexp-in-string ; subheadings
    934         "  \\(\\w+\\): "
    935         (lambda (match)
    936           (propertize  (concat "  "(match-string 1 match) ": ")
    937                        'face 'bold)))))
    938 
    939 (defun lexic-format-expand-abbreviations (content &optional force)
    940   "Expand certain standard abbreviations in CONTENT when `lexic-expand-abbreviations' or FORCE are non-nil."
    941   (when content
    942     (when (or lexic-expand-abbreviations force)
    943       (let ((abbreviations
    944              '(; A
    945                ("adj"                     "adjective")
    946                ("a"                       "adjective")
    947                ("abbrev"                  "abbreviated")
    948                ("abl"                     "ablative")
    949                ("Abp"                     "Archbishop")
    950                ("acc"                     "Acoustics")
    951                ("act"                     "active")
    952                ("adv"                     "adverb")
    953                ("Agric"                   "Agriculture")
    954                ("Alban"                   "Albanian")
    955                ("Alg"                     "Algebra")
    956                ("Am"                      "America")
    957                ("Amer"                    "American")
    958                ("Am"                      "Amos")
    959                ("Am\\. Cyc"               "Appleton's American Cyclopedia")
    960                ("Anal. Geom"              "Analytical Geometry")
    961                ("Anat"                    "Anatomy")
    962                ("Anc"                     "Ancient")
    963                ("Angl\\. Ch"              "Anglican Church")
    964                ("aor"                     "aorist")
    965                ("Ar"                      "Arabic")
    966                ("Arch"                    "Architecture")
    967                ("Arch\\. Pub\\. Soc"      "Architectural Pub. Society")
    968                ("Arith"                   "Arithmetic")
    969                ("Arm\\., Armor"           "Armorican")
    970                ("AS"                      "Anglo-Saxon")
    971                ("Astrol"                  "Astrology")
    972                ("Astron"                  "Astronomy")
    973                ("aug"                     "augmentative")
    974                ;; B
    975                ("Bank"                    "Banking")
    976                ("Beau\\. & Fl"            "Beaumont & Fletcher")
    977                ("B\\. & Fl"               "Beaumont & Fletcher")
    978                ("Bib\\. Sacra"            "Bibliotheca Sacra")
    979                ("Bib"                     "Biblical")
    980                ("Bibliog"                 "Bibliography")
    981                ("Biol"                    "Biology")
    982                ("Bisc"                    "Biscayan")
    983                ("B\\. Jon"                "Ben Jonson")
    984                ("Bk\\. of Com\\. Prayer " "Book of Common Prayer")
    985                ("Blackw\\. Mag"           "Blackwood's Magazine")
    986                ("Bohem"                   "Bohemian")
    987                ("Bot"                     "Botany")
    988                ("Bp"                      "Bishop")
    989                ("Brande & C"              "Brande & Cox")
    990                ("Braz"                    "Brazilian")
    991                ("Brit\\. Critic"          "British Critic")
    992                ("Brit\\. Quar\\. Rev"     "British Quarterly Review")
    993                ("Burl"                    "Burlesque")
    994                ;; C
    995                ("C"                       "Centigrade")
    996                ("Cant"                    "Canticles")
    997                ("Carp"                    "Carpentry")
    998                ("Catal"                   "Catalan")
    999                ("Cath\\. Dict"            "Catholic Dictionary")
   1000                ("Celt"                    "Celtic")
   1001                ("cf"                      "confer")
   1002                ("Cf"                      "Confer")
   1003                ("Ch"                      "Church")
   1004                ("Chald"                   "Chaldee")
   1005                ("Chem"                    "Chemistry")
   1006                ("Ch\\. Hist"              "Church History")
   1007                ("Chron"                   "Chronology, Chronicles")
   1008                ("Civ"                     "Civil")
   1009                ("Class"                   "Classical")
   1010                ("Class\\. Myth"           "Classical Mythology")
   1011                ("Col"                     "Colossians")
   1012                ("colloq\\., coll"         "colloquial, colloquially")
   1013                ("Com"                     "Commerce, Common")
   1014                ("comp"                    "compound, compounded, composition")
   1015                ("compar"                  "comparative")
   1016                ("conj"                    "conjunction")
   1017                ("Con\\. Sect"             "Conic Sections")
   1018                ("contr"                   "contracted, contraction")
   1019                ("Copt"                    "Coptic")
   1020                ("Corn"                    "Cornish")
   1021                ("corrupt"                 "corrupted, corruption")
   1022                ("Cotgr"                   "Cotgrave")
   1023                ("Cyc\\. Med"              "Cyclopedia of Practical Medicine")
   1024                ("Crim\\. Law"             "Criminal Law")
   1025                ("Crystallog"              "Crystallography")
   1026                ("Cyc"                     "Cyclopedia")
   1027                ;; D
   1028                ("D"                       "Dutch (sometimes Daniel)")
   1029                ("Dan"                     "Danish")
   1030                ("dat"                     "dative")
   1031                ("def"                     "definitions")
   1032                ("Deut"                    "Deuteronomy")
   1033                ("Dial"                    "Dialectic")
   1034                ("dim"                     "diminutive")
   1035                ("Diosc"                   "dioscorides")
   1036                ("Disp"                    "Dispensatory")
   1037                ("Disus"                   "Disused")
   1038                ("Dom\\. Econ"             "Domestic Economy")
   1039                ("Dublin Univ\\. Mag"      "Dublin University Magazine")
   1040                ("Dyn"                     "Dynamics")
   1041                ;; E
   1042                ("E"                       "English")
   1043                ("Eccl"                    "Ecclesiastical, Ecclesiastes")
   1044                ("Eccl\\. Hist"            "Ecclesiastical History")
   1045                ("Ecclus"                  "Ecclesiasticus")
   1046                ("Eclec\\. Rev"            "Eclectic Review")
   1047                ("Ed\\. Rev"               "Edinburgh Review")
   1048                ;; ("e\\. g"                  "exempli gratia (for example)")
   1049                ("Egypt"                   "Egyptian")
   1050                ("Elect"                   "Electricity")
   1051                ("Elec"                    "Electrical")
   1052                ("emph"                    "emphatic")
   1053                ("Encyc\\. Amer"           "Encyclopedia Americana")
   1054                ("Encyc\\. Crit"           "Encyclopedia Britannica")
   1055                ("Encyc\\. Dict"           "Hunter's Encyclopedic Dictionary")
   1056                ("Encyc"                   "Encyclopedia")
   1057                ("Eng\\. Cyc"              "English Cyclopedia")
   1058                ("Eng"                     "English")
   1059                ("Engin"                   "Engineering")
   1060                ("Eol"                     "Eolic")
   1061                ("Eph\\., Ephes"           "Ephesians")
   1062                ("equiv"                   "equivalent")
   1063                ("Esd"                     "Esdras")
   1064                ("esp"                     "especially")
   1065                ("Etch\\. & Eng"           "Etching & Engraving")
   1066                ("Ethnol"                  "Ethnology")
   1067                ("etym\\., etymol"         "etymology")
   1068                ("Ex\\., Exod"             "Exodus")
   1069                ("Ezek"                    "Ezekiel")
   1070                ;; F
   1071                ("F"                       "French")
   1072                ("f"                       "feminine")
   1073                ("fem"                     "feminine")
   1074                ("Fahr"                    "Fahrenheit")
   1075                ("Far"                     "Farriery")
   1076                ("Feud"                    "Feudal")
   1077                ("Fig"                     "Figurative, figuratively")
   1078                ("Fin"                     "Finnish")
   1079                ("For\\. Quart\\. Rev"     "Foreign Quarterly Review")
   1080                ("Fort"                    "Fortification")
   1081                ("Fr"                      "French")
   1082                ("fr"                      "from")
   1083                ("freq"                    "frequentative")
   1084                ("Fries"                   "Friesic")
   1085                ("fut"                     "future")
   1086                ;; G
   1087                ("G"                       "German")
   1088                ("Gael"                    "Gaelic")
   1089                ("Gal"                     "Galen")
   1090                ("Gal"                     "Galatians")
   1091                ("Galv"                    "Galvanism")
   1092                ("gen"                     "generally, genitive")
   1093                ("Geneal"                  "Genealogy")
   1094                ("Gent\\. Mag"             "Gentleman's Magazine")
   1095                ("Geog"                    "Geography")
   1096                ("Geol"                    "Geology")
   1097                ("Geom"                    "Geometry")
   1098                ("Ger"                     "Germanic or German")
   1099                ("Gk"                      "Greek")
   1100                ("Goth"                    "Gothic")
   1101                ("Gov\\. of Tongue"        "Government of the Tongue")
   1102                ("Gr"                      "Greek")
   1103                ("Gram"                    "Grammar")
   1104                ("Gris"                    "Grisons")
   1105                ("Gun"                     "Gunnery")
   1106                ;; H
   1107                ("H"                       "High")
   1108                ("Hab"                     "Habakkuk")
   1109                ("Hag"                     "Haggai")
   1110                ("Ham\\. Nav\\. Encyc"     "Hamersly's Naval Encyclopedia")
   1111                ("Heb"                     "Hebrew")
   1112                ("Her"                     "Heraldry")
   1113                ("Hind"                    "Hindostanee")
   1114                ("Hipp"                    "Hippocrates")
   1115                ("Hist"                    "History")
   1116                ("Horol"                   "Horology")
   1117                ("Hort"                    "Horticulture")
   1118                ("Hung"                    "Hungarian")
   1119                ("Hydraul"                 "Hydraulics")
   1120                ("Hydros"                  "Hydrostatics")
   1121                ("hypoth"                  "hypothetical")
   1122                ;; I
   1123                ("Icel"                    "Icelandic")
   1124                ;; ("i\\. e"                  "id est (that is)")
   1125                ("Illust"                  "Illustration, Illustrated")
   1126                ("imp"                     "imperfect")
   1127                ("Imp\\. Dict"             "Imperial Dictionary")
   1128                ("incho"                   "inchoative")
   1129                ("ind"                     "indicative")
   1130                ("indef"                   "indefinite")
   1131                ("inf"                     "infinitive")
   1132                ("intens"                  "intensive")
   1133                ("interj"                  "interjection")
   1134                ("Internat\\. Cyc"         "International Cyclopeia")
   1135                ("Ion"                     "Ionic")
   1136                ("i\\. q"                  "idem quod")
   1137                ("Ir"                      "Irish")
   1138                ("Is"                      "Isaiah")
   1139                ("Isa"                     "Isaiah")
   1140                ("It"                      "Italian")
   1141                ;; J
   1142                ("Jap"                     "Japanese")
   1143                ("Jas"                     "James")
   1144                ("Jav"                     "Javanese")
   1145                ("Jer"                     "Jeremiah")
   1146                ("Join"                    "Joinery")
   1147                ("Josh"                    "Joshua")
   1148                ("Judg"                    "Judges")
   1149                ;; K
   1150                ("K"                       "Kings")
   1151                ;; L
   1152                ("L"                       "Latin")
   1153                ("Lam"                     "Lamentations")
   1154                ("Lapp"                    "Lappish")
   1155                ("Lat"                     "Latin")
   1156                ("LD"                      "Low Dutch")
   1157                ("Lett"                    "Lettish")
   1158                ("Lev"                     "Leviticus")
   1159                ("LG"                      "Low German")
   1160                ("LGr"                     "Low Greek")
   1161                ("Linn"                    "Linnæus")
   1162                ("Lit"                     "Literally")
   1163                ("lit"                     "literally")
   1164                ("Lit"                     "Literature")
   1165                ("Lith"                    "Lithuanian")
   1166                ("LL"                      "Late Latin")
   1167                ;; M
   1168                ("M"                       "Middle")
   1169                ("m"                       "masculine")
   1170                ("masc"                    "masculine")
   1171                ("Maced"                   "Macedonian")
   1172                ("Mach"                    "Machinery")
   1173                ("Mad"                     "Madam")
   1174                ("Mag"                     "Magazine")
   1175                ("Mal"                     "Malachi")
   1176                ("Malay"                   "Malayan")
   1177                ("Man"                     "Manège")
   1178                ("Manuf"                   "Manufacturing")
   1179                ("Mar"                     "Maritime")
   1180                ("Math"                    "Mathematics")
   1181                ("Matt"                    "Matthew")
   1182                ("ME"                      "Middle English")
   1183                ("Mech"                    "Mechanic")
   1184                ("Med"                     "Medicine")
   1185                ("Metal"                   "Metallurgy")
   1186                ("Metaph"                  "Metaphysics")
   1187                ("Meteor"                  "Meteorolgy")
   1188                ("mgr"                     "milligrams")
   1189                ("MHG"                     "Middle High German")
   1190                ("Micros"                  "Microscopy")
   1191                ("Mil"                     "Military")
   1192                ("Min"                     "Mineralogy")
   1193                ("Mir\\. for Mag"          "Mirror for Magistrates")
   1194                ("MLG"                     "Middle Low German")
   1195                ("Moham"                   "Mohammedan")
   1196                ("Mozley & W"              "Mozley & Whiteley")
   1197                ("Mus"                     "Music")
   1198                ("Myst"                    "Mysteries")
   1199                ("Myth"                    "Mythology")
   1200                ;; N
   1201                ("Nat\\. Hist"             "Natural History")
   1202                ("Nat\\. ord"              "Natural order")
   1203                ("Naut"                    "Nautical")
   1204                ("Nav"                     "Navy")
   1205                ("Navig"                   "Navigation")
   1206                ("N\\. Brit\\. Rev"        "North British Review")
   1207                ("Neh"                     "Nehemiah")
   1208                ("neut"                    "neuter")
   1209                ("New Am\\. Cyc"           "New American Cyclopedia")
   1210                ("New Month\\. Mag"        "New Monthly Magazine")
   1211                ("NF"                      "New French")
   1212                ("NGr"                     "Mew Greek")
   1213                ("NHeb"                    "New Hebrew")
   1214                ("NL"                      "New Latin")
   1215                ("nom"                     "nominative")
   1216                ("Norm\\. F"               "Norman French")
   1217                ("North Am\\. Rev"         "North American Review")
   1218                ("Norw"                    "Norwegian")
   1219                ("Num"                     "Numbers")
   1220                ("Numis"                   "Numismatics")
   1221                ("N"                       "New")
   1222                ;; O
   1223                ("O"                       "Old")
   1224                ("Ob"                      "Obadiah")
   1225                ("obs"                     "obsolete")
   1226                ("Obsoles"                 "Obsolescent")
   1227                ("OCelt"                   "Old Celtic")
   1228                ("OD"                      "Old Dutch")
   1229                ("ODan"                    "Old Danish")
   1230                ("OE"                      "Old English")
   1231                ("OF"                      "Old French")
   1232                ("OFelm"                   "Old Flemish")
   1233                ("OFris"                   "Old Frisian")
   1234                ("OFries"                  "Old Friesic")
   1235                ("OGael"                   "Old Gaelic")
   1236                ("OGr"                     "Old Greek")
   1237                ("OHG"                     "Old High German")
   1238                ("OIcel"                   "Old Icelandic")
   1239                ("OIt"                     "Old Italian")
   1240                ("OL"                      "Old Latin")
   1241                ("OIr"                     "Old Irish")
   1242                ("ON"                      "Old Norse")
   1243                ("OLG"                     "Old Low German")
   1244                ("OPer"                    "Old Persian")
   1245                ("OPg"                     "Old Portuguese")
   1246                ("OPol"                    "Old Polish")
   1247                ("Opt"                     "Optics")
   1248                ("orig"                    "original")
   1249                ("Ornith"                  "Ornithology")
   1250                ("OS"                      "Old Saxon")
   1251                ("OSlav"                   "Old Slavic")
   1252                ("OSp"                     "Old Spanish")
   1253                ("Oxf\\. Gloss"            "Oxford Glossary of Architecture")
   1254                ;; P
   1255                ("p\\.[\n ]*a"             "participial adjective")
   1256                ("Paint"                   "Painting")
   1257                ("Paleon"                  "Paleontology")
   1258                ("pass"                    "passive")
   1259                ("Pathol"                  "Pathology")
   1260                ("P\\. Cyc"                "Penny Cyclopedia")
   1261                ("Per"                     "Persian")
   1262                ("perh"                    "perhaps")
   1263                ("pers"                    "person")
   1264                ("Persp"                   "Perspective")
   1265                ("Pert"                    "Pertaining")
   1266                ("Peruv"                   "Peruvian")
   1267                ("Pet"                     "Peter")
   1268                ("Pg"                      "Portuguese")
   1269                ("Pharm"                   "Pharmacy, Pharmacopœia")
   1270                ("Phil"                    "Phillipians")
   1271                ("Philem"                  "Philemon")
   1272                ("Philol"                  "Philology")
   1273                ("Philos"                  "Philosophy")
   1274                ("Phon"                    "Phonetics")
   1275                ("Photog"                  "Photography")
   1276                ("Photom"                  "Photometry")
   1277                ("Phren"                   "Phrenology")
   1278                ("Phys"                    "Physics")
   1279                ("Phys\\. Sci"             "Physical Science")
   1280                ("Physiol"                 "Physiology")
   1281                ("pl"                      "plural")
   1282                ("Poet"                    "Poetry, Poetical")
   1283                ("Pol"                     "Polish")
   1284                ("Pol\\. Econ"             "Political Economy")
   1285                ("Polit\\. Econ"           "Political Economy")
   1286                ("Pop\\. Sci\\. Monthly"   "Polular Science Monthly")
   1287                ("Poss"                    "Possessive")
   1288                ("pp"                      "pages")
   1289                ("P\\. Plowman"            "Piers Plowman")
   1290                ("p\\.[\n ]*p"             "past participle")
   1291                ("p\\.[\n ]*pr"            "present participle")
   1292                ("p\\.[\n ]*ple"           "present participle")
   1293                ("Pr"                      "Provençal")
   1294                ("Pref"                    "Preface")
   1295                ("pref"                    "prefix")
   1296                ("prep"                    "preposition")
   1297                ("pres"                    "present")
   1298                ("pret"                    "preterit")
   1299                ("prin"                    "principally")
   1300                ("Print"                   "Printing")
   1301                ("priv"                    "privative")
   1302                ("prob"                    "probably")
   1303                ("pron"                    "pronoun")
   1304                ("prop"                    "properly")
   1305                ("Pros"                    "Prosody")
   1306                ("prov"                    "provincial")
   1307                ("Prov"                    "Provincial")
   1308                ("Prov"                    "Proverbs")
   1309                ("Ps\\., Psa"              "Psalms")
   1310                ("Pyro\\.-elect"           "Pyro-electricity")
   1311                ("p"                       "participle")
   1312                ;; Q
   1313                ("Quart\\. Rev"            "Quarterly Review")
   1314                ("q\\. v"                  "quod vide (which see)")
   1315                ;; R
   1316                ("R\\. C"                  "Roman Catholic")
   1317                ("R\\. C\\. Ch"            "Roman Catholic Church")
   1318                ("Rep\\. Sec\\. of War"    "Report of Secretary of War")
   1319                ("Rev"                     "Revelation")
   1320                ("Rev"                     "Review")
   1321                ("Rev\\. Ver"              "Revised Version (of the Bible)")
   1322                ("Rhet"                    "Rhetoric")
   1323                ("R\\. of Brunne"          "Robert of Brunne")
   1324                ("R\\. of Gl"              "Robert of Gloucester")
   1325                ("Rom"                     "Roman, Romans")
   1326                ("Rom\\. Cath"             "Roman Catholic")
   1327                ("Rom\\. of R"             "Romaunt of the Rose")
   1328                ("Rpts"                    "reports")
   1329                ("Russ"                    "Russian")
   1330                ("R"                       "Rare")
   1331                ;; S
   1332                ("Sam"                     "Samaritan")
   1333                ("Sam"                     "Samuel")
   1334                ("Sat\\. Rev"              "Saturday Review")
   1335                ("Sax"                     "Saxon")
   1336                ("sc"                      "scilicet (being understood)")
   1337                ("Scand"                   "Scandinavian")
   1338                ("Sci"                     "Science")
   1339                ("Sci\\. Am"               "Scientific American")
   1340                ("Scot"                    "Scotland, Scottish")
   1341                ("Script"                  "Scripture, Scriptural")
   1342                ("Sculp"                   "Sculpture")
   1343                ("Serb"                    "Serbian")
   1344                ("Serv"                    "Servian")
   1345                ("Shak"                    "Shakespeare")
   1346                ("sing"                    "singular")
   1347                ("Skr"                     "Sanskrit")
   1348                ("Slav"                    "Slavonic")
   1349                ("Sp"                      "Spanish")
   1350                ("Specif"                  "Specifically")
   1351                ("Stat"                    "Statuary")
   1352                ("subj"                    "subjunctive")
   1353                ("superl"                  "superlative")
   1354                ("Surg"                    "Surgery")
   1355                ("Surv"                    "Surveying")
   1356                ("Sw"                      "Swedish")
   1357                ("Syd\\. Soc\\. Lex"       "Sydenham Society Lexicon")
   1358                ("Syn"                     "Synonyms")
   1359                ("Synop"                   "Synopsis")
   1360                ("Syr"                     "Syriac")
   1361                ;; T
   1362                ("Tart"                    "Tartaric")
   1363                ("Teleg"                   "Telegraphy")
   1364                ("term"                    "termination")
   1365                ("Test"                    "Testament")
   1366                ("Theol"                   "Theology")
   1367                ("Thes"                    "Thessalonians")
   1368                ("Tim"                     "Timothy")
   1369                ("Todd & B"                "Todd & Bowman")
   1370                ("Trans"                   "Translation")
   1371                ("Treas"                   "Treasury")
   1372                ("Trig"                    "Trigonometry")
   1373                ("Turk"                    "Turkish")
   1374                ("Typog"                   "Typography")
   1375                ;; U
   1376                ("Univ"                    "University")
   1377                ("Up"                      "Upper")
   1378                ("U\\. ?S"                 "United States")
   1379                ("U\\. ?S\\. Disp"         "United States Dispensatory")
   1380                ("U\\. ?S\\. Pharm"        "United States Pharmacopœia")
   1381                ("U\\. ?S\\. Int\\. Rev\\. Statutes" "United States Internal Revenue Statutes")
   1382                ("usu"                     "usually")
   1383                ;; V
   1384                ("v\\.[\n ]*i"             "intransitive verb")
   1385                ("v\\.[\n ]*t"             "transitive verb")
   1386                ("var"                     "variety")
   1387                ("vb\\.[\n ]*n"            "verbal noun")
   1388                ("Veter"                   "Veterinary")
   1389                ("Vitr"                    "Vitruvius")
   1390                ;; W
   1391                ("W"                       "Welsh")
   1392                ("Wall"                    "Wallachian")
   1393                ("Westm\\. Cat"            "Westminster Catechism")
   1394                ("Westm\\. Rev"            "Westminster Review")
   1395                ;; Z
   1396                ("Zech"                    "Zechariah")
   1397                ("Zeph"                    "Zephaniah")
   1398                ("Zoöl"                    "Zoölogy")
   1399                ;; Reordered for correctness
   1400                ("n"                       "noun")
   1401                ("v"                       "verb"))))
   1402         (dolist (abbrev abbreviations)
   1403           (setq content
   1404                 (replace-regexp-in-string
   1405                  (concat "\\b" (car abbrev) "\\.")
   1406                  (cadr abbrev)
   1407                  content t)))))
   1408     content))
   1409 
   1410 (defun lexic-format-webster-diacritics (pronunciation)
   1411   "Replace ascii pronounciation symbols in PRONUNCIATION with unicode equivalents."
   1412   (let ((diacritics
   1413          '(("[,C]" "Ç")
   1414            ("\"u" "ü") ; uum
   1415            ("'e" "é") ; eacute
   1416            ("\\^a" "â") ; acir
   1417            ("\"a" "ä") ; aum
   1418            ("`a" "à") ; agrave
   1419            ("\\*a" "å") ; aring
   1420            ("\\*u" "ů") ; uring
   1421            (",c" "ç") ; ccedil
   1422            ("cced" "ç")
   1423            ("\\^e" "ê") ; ecir
   1424            ("\"e" "ë") ; eum
   1425            ("`e" "è") ; egrave
   1426            ("\"i" "ï") ; ium
   1427            ("\\^i" "î") ; icir
   1428            ("`i" "ì") ; igrave
   1429            ("\"A" "Ä") ; Aum
   1430            ("\\*A" "Å") ; Aring
   1431            ("'E" "È") ; Eacute
   1432            ("ae" "æ") ; ae
   1433            ("AE" "Æ") ; AE
   1434            ("\\^o" "ô") ; ocir
   1435            ("\"o" "ö") ; oum
   1436            ("`o" "ò") ; ograve
   1437            ("'o" "ó") ; oacute
   1438            ("Oacute" "Ó")
   1439            ("\\^u" "û") ; ucir
   1440            ("`u" "ù") ; ugrave
   1441            ("'u" "ú") ; uacute
   1442            ("\"y" "ÿ") ; yum
   1443            ("\"O" "Ö") ; Oum
   1444            ("\"U" "Ü")
   1445            ("pound" "£")
   1446            ("'a" "á") ; aacute
   1447            ("'i" "í") ; iacute
   1448            ("frac23" "⅔")
   1449            ("frac13" "⅓")
   1450            ("frac12" "½")
   1451            ("frac14" "¼")
   1452            ("\\?" "�")  ; Place-holder for unknown or illegible character.
   1453            ("hand" "☞")  ; pointing hand (printer's u"fist") ; hand ; and
   1454            ("\\.\\.\\." "…")
   1455            ("\\*\\*\\*\\*\\*\\*\\*\\*" "✶")
   1456            ("sect" "§") ; sect
   1457            ("=a" "ā") ; amac
   1458            ("ng" "ṉ")  ; u"n sub-macron" ; nsm
   1459            ("sharp" "♯") ; sharp
   1460            ("flat" "♭") ; flat
   1461            ("th" "th") ; th
   1462            ("=i" "ī") ; imac
   1463            ("imac" "ī") ; imac
   1464            ("=e" "ē") ; emac
   1465            ("\\.d" "ḍ")  ; Sanskrit/Tamil d dot
   1466            ("\\.n" "ṇ")  ; Sanskrit/Tamil n dot ; nsdot
   1467            ("\\.t" "ṭ")  ; Sanskrit/Tamil t dot ; tsdot
   1468            ("a\\^" "ă") ; acr
   1469            ("e\\^" "ĕ") ; ecr
   1470            ("i\\^" "ĭ") ; icr
   1471            ("o\\^" "ŏ") ; ocr
   1472            ("!o" "ǒ")
   1473            ("OE" "Œ") ; OE
   1474            ("oe" "œ") ; oe
   1475            ("=O" "Ō") ; Omac
   1476            ("=o" "ō") ; omac
   1477            ("=u" "ū") ; umac
   1478            ("ocar" "ǒ") ; o hacek
   1479            ("=ae" "ǣ") ; aemac
   1480            ("u\\^" "ŭ") ; ucr
   1481            ("a\\^" "ă")
   1482            ("=y" "ȳ") ; ymac
   1483            ("asl" "a")  ; FIXME: a u"semilong" (has a macron above with a short
   1484            ("-e" "e")  ; FIXME: e u"semilong" ; esl
   1485            ("-i" "i")  ; FIXME: i u"semilong" ; isl
   1486            ("-o" "o")  ; FIXME: o u"semilong" ; osl
   1487            ("-u" "u")  ; FIXME: u u"semilong" ; usl
   1488            ("-n" "ṉ") ; nsmac
   1489            ("\\.a" "ȧ")  ; a with dot above ; adot
   1490            ("\\.c" "ċ")  ; cdot
   1491            ("\\.h" "ḥ")  ; hsdot
   1492            ("\\.m" "ṃ")  ; msdot
   1493            ("\\.u" "ụ")  ; usdot
   1494            ("\\.z" "ẓ")  ; zsdot
   1495            ("Eth" "Ð") ; EDH
   1496            ("eth" "ð") ; edh
   1497            ("thorn" "þ") ; thorn
   1498            ("~a" "ã") ; atil
   1499            ("~e" "ẽ") ; etil
   1500            ("itil" "ĩ")
   1501            ("otil" "õ")
   1502            ("util" "ũ")
   1503            ("~n" "ñ") ; ntil
   1504            ("Atil" "Ã")
   1505            ("Etil" "Ẽ")
   1506            ("Itil" "Ĩ")
   1507            ("Otil" "Õ")
   1508            ("Util" "Ũ")
   1509            ("~N" "Ñ") ; Ntil
   1510            ("\\.n" "ṅ") ; ndot
   1511            ("\\.r" "ṛ") ; rsdot
   1512            ("yogh" "ȝ") ; yogh
   1513            ("deg" "°")
   1514            ("middot" "•")
   1515            ("root" "√")
   1516            ;; Greek letters
   1517            ("alpha" "α")
   1518            ("beta" "β")
   1519            ("gamma" "γ")
   1520            ("delta" "δ")
   1521            ("epsilon" "ε")
   1522            ("zeta" "ζ")
   1523            ("eta" "η")
   1524            ("theta" "θ")
   1525            ("iota" "ι")
   1526            ("approx" "κ") ; ap
   1527            ("lambda" "λ")
   1528            ("mu" "μ")
   1529            ("nu" "ν")
   1530            ("xi" "ξ")
   1531            ("omicron" "ο")
   1532            ("pi" "π")
   1533            ("rho" "ρ")
   1534            ("sigma" "σ")
   1535            ("sigmat" "ς")
   1536            ("tau" "τ")
   1537            ("upsilon" "υ")
   1538            ("phi" "φ")
   1539            ("chi" "χ")
   1540            ("psi" "ψ")
   1541            ("omega" "ω")
   1542            ("digamma" "ϝ")
   1543            ("ALPHA" "Α")
   1544            ("BETA" "Β")
   1545            ("Gamma" "Γ") ; GAMMA
   1546            ("Delta" "Δ") ; DELTA
   1547            ("EPSILON" "Ε")
   1548            ("ZETA" "Ζ")
   1549            ("ETA" "Η")
   1550            ("Theta" "Θ") ; THETA
   1551            ("IOTA" "Ι")
   1552            ("KAPPA" "Κ")
   1553            ("Lambda" "Λ") ; LAMBDA
   1554            ("MU" "Μ")
   1555            ("NU" "Ν")
   1556            ("XI" "Ξ")
   1557            ("Omicron" "Ο") ; OMICRON
   1558            ("Pi" "Π") ; PI
   1559            ("RHO" "Ρ")
   1560            ("Sigma" "Σ") ; SIGMA
   1561            ("Tau" "Τ") ; TAU
   1562            ("Upsilon" "Υ") ; UPSILON
   1563            ("PHI" "Φ")
   1564            ("Chi" "Χ") ; CHI
   1565            ("PSI" "Ψ")
   1566            ("Omega" "Ω") ; OMEGA
   1567            ;; FIXME: Vowels with a double dot below. There`s nothing suitable in the Unicode
   1568            ("add" "a")
   1569            ("udd" "u")
   1570            ("ADD" "A")
   1571            ("UDD" "U")
   1572            ;; Quotes
   1573            ("dagger" "†")
   1574            ("dag" "†")
   1575            ("u\\^" "§") ; par
   1576            ("and" "and")
   1577            ("or" "or")
   1578            ("sec" "˝")
   1579            ("[,C]" "Ç")
   1580            ("\"u" "ü") ; uum
   1581            ("'e" "é") ; eacute
   1582            ("\\^a" "â") ; acir
   1583            ("\"a" "ä") ; aum
   1584            ("`a" "à") ; agrave
   1585            ("\\*a" "å") ; aring
   1586            ("\\*u" "ů") ; uring
   1587            (",c" "ç") ; ccedil
   1588            ("cced" "ç")
   1589            ("\\^e" "ê") ; ecir
   1590            ("\"e" "ë") ; eum
   1591            ("`e" "è") ; egrave
   1592            ("\"i" "ï") ; ium
   1593            ("\\^i" "î") ; icir
   1594            ("`i" "ì") ; igrave
   1595            ("\"A" "Ä") ; Aum
   1596            ("\\*A" "Å") ; Aring
   1597            ("'E" "È") ; Eacute
   1598            ("ae" "æ") ; ae
   1599            ("AE" "Æ") ; AE
   1600            ("\\^o" "ô") ; ocir
   1601            ("\"o" "ö") ; oum
   1602            ("`o" "ò") ; ograve
   1603            ("'o" "ó") ; oacute
   1604            ("Oacute" "Ó")
   1605            ("\\^u" "û") ; ucir
   1606            ("`u" "ù") ; ugrave
   1607            ("'u" "ú") ; uacute
   1608            ("\"y" "ÿ") ; yum
   1609            ("\"O" "Ö") ; Oum
   1610            ("\"U" "Ü")
   1611            ("pound" "£")
   1612            ("'a" "á") ; aacute
   1613            ("'i" "í") ; iacute
   1614            ("frac23" "⅔")
   1615            ("frac13" "⅓")
   1616            ("frac12" "½")
   1617            ("frac14" "¼")
   1618            ;; ("\\?" "�")  ; Place-holder for unknown or illegible character.
   1619            ("hand" "☞")  ; pointing hand (printer's u"fist") ; hand ; and
   1620            ("sect" "§") ; sect
   1621            ("=a" "ā") ; amac
   1622            ("ng" "ṉ")  ; u"n sub-macron" ; nsm
   1623            ("sharp" "♯") ; sharp
   1624            ("flat" "♭") ; flat
   1625            ("th" "th") ; th
   1626            ("=i" "ī") ; imac
   1627            ("=e" "ē") ; emac
   1628            ("\\.d" "ḍ")  ; Sanskrit/Tamil d dot
   1629            ("\\.n" "ṇ")  ; Sanskrit/Tamil n dot ; nsdot
   1630            ("\\.t" "ṭ")  ; Sanskrit/Tamil t dot ; tsdot
   1631            ("a\\^" "ă") ; acr
   1632            ("e\\^" "ĕ") ; ecr
   1633            ("i\\^" "ĭ") ; icr
   1634            ("o\\^" "ŏ") ; ocr
   1635            ("!o" "ǒ")
   1636            ("OE" "Œ") ; OE
   1637            ("oe" "œ") ; oe
   1638            ("=O" "Ō") ; Omac
   1639            ("=o" "ō") ; omac
   1640            ("=u" "ū") ; umac
   1641            ("ocar" "ǒ") ; o hacek
   1642            ("=ae" "ǣ") ; aemac
   1643            ("u\\^" "ŭ") ; ucr
   1644            ("a\\^" "ă")
   1645            ("=y" "ȳ") ; ymac
   1646            ("asl" "a")  ; FIXME: a u"semilong" (has a macron above with a short
   1647            ("-e" "e")  ; FIXME: e u"semilong" ; esl
   1648            ("-i" "i")  ; FIXME: i u"semilong" ; isl
   1649            ("-o" "o")  ; FIXME: o u"semilong" ; osl
   1650            ("-u" "u")  ; FIXME: u u"semilong" ; usl
   1651            ("-n" "ṉ") ; nsmac
   1652            ("\\.a" "ȧ")  ; a with dot above ; adot
   1653            ("\\.c" "ċ")  ; cdot
   1654            ("\\.h" "ḥ")  ; hsdot
   1655            ("\\.m" "ṃ")  ; msdot
   1656            ("\\.u" "ụ")  ; usdot
   1657            ("\\.z" "ẓ")  ; zsdot
   1658            ("Eth" "Ð") ; EDH
   1659            ("eth" "ð") ; edh
   1660            ("thorn" "þ") ; thorn
   1661            ("~a" "ã") ; atil
   1662            ("~e" "ẽ") ; etil
   1663            ("itil" "ĩ")
   1664            ("otil" "õ")
   1665            ("util" "ũ")
   1666            ("~n" "ñ") ; ntil
   1667            ("Atil" "Ã")
   1668            ("Etil" "Ẽ")
   1669            ("Itil" "Ĩ")
   1670            ("Otil" "Õ")
   1671            ("Util" "Ũ")
   1672            ("~N" "Ñ") ; Ntil
   1673            ("\\.n" "ṅ") ; ndot
   1674            ("\\.r" "ṛ") ; rsdot
   1675            ("yogh" "ȝ") ; yogh
   1676            ("deg" "°")
   1677            ("middot" "•")
   1678            ("root" "√")
   1679            ;; Greek letters
   1680            ("alpha" "α")
   1681            ("beta" "β")
   1682            ("gamma" "γ")
   1683            ("delta" "δ")
   1684            ("epsilon" "ε")
   1685            ("zeta" "ζ")
   1686            ("eta" "η")
   1687            ("theta" "θ")
   1688            ("iota" "ι")
   1689            ("approx" "κ") ; ap
   1690            ("lambda" "λ")
   1691            ("mu" "μ")
   1692            ("nu" "ν")
   1693            ("xi" "ξ")
   1694            ("omicron" "ο")
   1695            ("pi" "π")
   1696            ("rho" "ρ")
   1697            ("sigma" "σ")
   1698            ("sigmat" "ς")
   1699            ("tau" "τ")
   1700            ("upsilon" "υ")
   1701            ("phi" "φ")
   1702            ("chi" "χ")
   1703            ("psi" "ψ")
   1704            ("omega" "ω")
   1705            ("digamma" "ϝ")
   1706            ("ALPHA" "Α")
   1707            ("BETA" "Β")
   1708            ("Gamma" "Γ") ; GAMMA
   1709            ("Delta" "Δ") ; DELTA
   1710            ("EPSILON" "Ε")
   1711            ("ZETA" "Ζ")
   1712            ("ETA" "Η")
   1713            ("Theta" "Θ") ; THETA
   1714            ("IOTA" "Ι")
   1715            ("KAPPA" "Κ")
   1716            ("Lambda" "Λ") ; LAMBDA
   1717            ("MU" "Μ")
   1718            ("NU" "Ν")
   1719            ("XI" "Ξ")
   1720            ("Omicron" "Ο") ; OMICRON
   1721            ("Pi" "Π") ; PI
   1722            ("RHO" "Ρ")
   1723            ("Sigma" "Σ") ; SIGMA
   1724            ("Tau" "Τ") ; TAU
   1725            ("Upsilon" "Υ") ; UPSILON
   1726            ("PHI" "Φ")
   1727            ("Chi" "Χ") ; CHI
   1728            ("PSI" "Ψ")
   1729            ("Omega" "Ω") ; OMEGA
   1730            ;; FIXME: Vowels with a double dot below. There`s nothing suitable in the Unicode
   1731            ("add" "a")
   1732            ("udd" "u")
   1733            ("ADD" "A")
   1734            ("UDD" "U")
   1735            ;; Quotes
   1736            ("dagger" "†")
   1737            ("dag" "†")
   1738            ("u\\^" "§") ; par
   1739            ("and" "and")
   1740            ("or" "or")
   1741            ("times" "×")
   1742            ("sec" "˝"))))
   1743     (setq pronunciation
   1744           (replace-regexp-in-string
   1745            "\\.\\.\\." "…"
   1746            (replace-regexp-in-string
   1747             "\\*\\*\\*\\*\\*\\*\\*\\*" "✶"
   1748             pronunciation)))
   1749     (dolist (dcrt diacritics)
   1750       (setq pronunciation (replace-regexp-in-string
   1751                            (concat "\\[" (car dcrt) "\\]")
   1752                            (cadr dcrt)
   1753                            pronunciation t)))
   1754     pronunciation))
   1755 
   1756 (defun lexic-format-reflow-text (text max-width &optional min-width initial-colunm indent sepregex)
   1757   "Add newlines as required to ensure that TEXT never exceeds MAX-WIDTH columns.
   1758 
   1759 Can also set a MIN-WIDTH for new lines of text created by a line break,
   1760 an INITIAL-COLUNM that the text starts at, and an INDENT string to be inserted
   1761 after every line break.
   1762 
   1763 When regex SEPREGEX is provided, it is used to detect word separators.
   1764 It is \"[ ,.]\" by default."
   1765   (let* ((initial-col (or initial-colunm 0))
   1766          (min-width (or min-width 1))
   1767          (indent (or indent ""))
   1768          (sepregex (or sepregex "[ ,.]"))
   1769          (line-regex (format "\\(\\`.\\{%d,%d\\}\\(?:%s\\(?:.\\{1,%d\\}\\'\\)?\\|\\'\\)\\|.\\{%d\\}\\)\\(.*\\)"
   1770                              min-width (- max-width (length indent))
   1771                              sepregex min-width (- max-width (length indent))))
   1772          reflowed-text)
   1773     (setq text (replace-regexp-in-string "[[:space:]]+" " " text))
   1774     (setq text
   1775           (if (> initial-col max-width)
   1776               (replace-regexp-in-string "\\` " "" text)
   1777             (replace-regexp-in-string
   1778              (format "\\`.\\{%d,%d\\}%s\\(?:.\\{1,%d\\}\\'\\)?\\|\\`"
   1779                      (min min-width (- max-width initial-col)) (- max-width initial-col)
   1780                      sepregex min-width)
   1781              (lambda (match)
   1782                (setq reflowed-text match)
   1783                "")
   1784              text)))
   1785     (while (not (string-empty-p text))
   1786       (setq text
   1787             (if (<= (length text) max-width)
   1788                 (progn (setq reflowed-text (concat reflowed-text
   1789                                                    (unless (string-empty-p reflowed-text)
   1790                                                      (concat "\n" indent))
   1791                                                    text)) "")
   1792               (replace-regexp-in-string
   1793                line-regex
   1794                (lambda (match)
   1795                  (setq reflowed-text (concat reflowed-text "\n" indent (match-string 1 match)))
   1796                  (match-string 2 match))
   1797                text))))
   1798     reflowed-text))
   1799 
   1800 (defun lexic-format-online-etym (entry &optional _expected-word)
   1801   "Make an html ENTRY look nice.
   1802 Designed for an export of Douglas Harper's Online Etymology Dictionary,
   1803 collected using https://framagit.org/tuxor1337/dictmaster."
   1804   (thread-last
   1805       (string-join
   1806        (delq nil
   1807              (mapcar
   1808               (lambda (e)
   1809                 (when (string= (plist-get e :dict)
   1810                                (plist-get entry :dict))
   1811                   (plist-get e :info)))
   1812               (lexic-parse-results
   1813                (lexic-oneshot-lookup
   1814                 (replace-regexp-in-string " ?(.*)" " (*)" (plist-get entry :word)) ; lexic accepts a glob
   1815                 t (list "-0" "-u" (plist-get entry :dict)))))))
   1816     (replace-regexp-in-string
   1817      "\\(?:\\`\\|\n\n\\)<b>\\(.+?\\) (\\(.+?\\)\\([0-9]+\\)?)</b> ?"
   1818      (lambda (match)
   1819        (let ((word (match-string 1 match))
   1820              (pos (lexic-format-expand-abbreviations (match-string 2 match)))
   1821              (index (match-string 3 match)))
   1822          (concat "\n\n\u200B\u200B\u200B"
   1823                  (propertize word 'face 'bold)
   1824                  " "
   1825                  (propertize pos 'face '(bold font-lock-keyword-face))
   1826                  (when index
   1827                    (propertize (concat " " index) 'face '(italic font-lock-doc-face)))
   1828                  "\u2008\n\n"))))
   1829     (replace-regexp-in-string
   1830      "<i>\\(.*?\\)</i>"
   1831      (lambda (match) (propertize (match-string 1 match) 'face 'italic)))
   1832     (replace-regexp-in-string
   1833      "<b>\\(.*?\\)</b>"
   1834      (lambda (match) (propertize (match-string 1 match) 'face 'bold)))
   1835     (replace-regexp-in-string
   1836      "<strong>\\(.*?\\)</strong>"
   1837      (lambda (match) (propertize (match-string 1 match) 'face 'font-lock-constant-face)))
   1838     (replace-regexp-in-string
   1839      "<a href=\".*?\">\\(.*?\\)</a>"
   1840      (lambda (match) (propertize (match-string 1 match) 'face 'font-lock-keyword-face)))
   1841     (replace-regexp-in-string
   1842      "<span style=\".*?\">\\(.*?\\)</span>"
   1843      (lambda (match) (propertize (match-string 1 match) 'face 'font-lock-doc-face)))
   1844     (replace-regexp-in-string
   1845      "[0-9]\\{4\\}s?\\|[0-9]+c\\."
   1846      (lambda (match) (propertize match 'face 'font-lock-string-face)))
   1847     (replace-regexp-in-string
   1848      "<span>\\(.*?\\)</span>\\( (.+?)\\)?"
   1849      (lambda (match)
   1850        (let ((linked (match-string 1 match))
   1851              (pos (lexic-format-expand-abbreviations (match-string 2 match))))
   1852          (concat
   1853           (propertize linked 'face 'font-lock-keyword-face)
   1854           (when pos (propertize (replace-regexp-in-string "\\([0-9]+\\))" " \\1)" pos)
   1855                                 'face '(bold diff-context)))))))
   1856     (replace-regexp-in-string
   1857      "<blockquote>\\(.+?\\) ?\\[\\(.+\\)\\]</blockquote>"
   1858      (lambda (match)
   1859        (concat "❝"
   1860                (propertize
   1861                 (lexic-format-reflow-text
   1862                  (match-string 1 match) 80 5 1 " ")
   1863                 'face 'diff-context)
   1864                "❞\n"
   1865                (propertize (concat " ──"
   1866                                    (lexic-format-reflow-text (match-string 2 match)
   1867                                                              75 5 3 "   "))
   1868                            'face '(italic font-lock-type-face)))))
   1869     (replace-regexp-in-string "<br/>\n?<br/>" "\n")
   1870     (replace-regexp-in-string
   1871      "<p>\\(.*?\\)</p>"
   1872      (lambda (match)
   1873        (concat
   1874         (lexic-format-reflow-text (match-string 1 match)
   1875                                   80 5)
   1876         "\n")))
   1877     (replace-regexp-in-string "</?p>" "") ; any straggling pars
   1878     (replace-regexp-in-string
   1879      "^.\\{86,\\}"
   1880      (lambda (match)
   1881        (lexic-format-reflow-text match 80 5)))))
   1882 
   1883 (defun lexic-format-element (entry &optional _expected-word)
   1884   "Make an ENTRY for an element Look nice.
   1885 Based on http://download.huzheng.org/dict.org/stardict-dictd_www.dict.org_elements-2.4.2.tar.bz2."
   1886   (replace-regexp-in-string
   1887    "^\\([a-z]+\\)
   1888 Symbol: \\([A-Za-z]+\\)
   1889 Atomic number: \\([0-9]+\\)
   1890 Atomic weight: \\((?[0-9.]+)?\\)"
   1891    (lambda (match)
   1892      (let ((element (match-string 1 match))
   1893            (symbol (match-string 2 match))
   1894            (number (match-string 3 match))
   1895            (weight (match-string 4 match)))
   1896        (format
   1897         "┌────────────────┐
   1898 │ %-3s %10s │
   1899 │ %s %11s │
   1900 └────────────────┘
   1901 "
   1902         (propertize number 'face 'font-lock-function-name-face)
   1903         (propertize weight 'face 'font-lock-comment-face)
   1904         (propertize symbol 'face '(bold font-lock-keyword-face))
   1905         (propertize element 'face 'font-lock-string-face))))
   1906    (plist-get entry :info)))
   1907 
   1908 (defun lexic-format-soule (entry &optional _expected-word)
   1909   "Format an ENTRY for WORD in Soule's Dictionary of English Synonyms.
   1910 Designed using http://download.huzheng.org/bigdict/stardict-Soule_s_Dictionary_of_English_Synonyms-2.4.2.tar.bz2."
   1911   (thread-last (plist-get entry :info)
   1912        (replace-regexp-in-string ; categories
   1913         "^\\([IVX]+\\. \\)?\\([a-z.;& ]+\\)"
   1914         (lambda (match)
   1915           (concat
   1916            "\u200B\u200B\u200B"
   1917            (when case-fold-search
   1918              (propertize
   1919               'face '(bold font-lock-constant-face)))
   1920            (propertize (lexic-format-expand-abbreviations (match-string 2 match))
   1921                        'face '(bold font-lock-keyword-face))
   1922            "\u2008")))
   1923        (replace-regexp-in-string
   1924         "^\\([0-9]+\\.\\) \n\\([^,.]*,?\\)"
   1925         (lambda (match)
   1926           (concat
   1927            "\u200B\u200B\u200B\u200B"
   1928            (propertize (match-string 1 match)
   1929                        'face '(bold font-lock-string-face))
   1930            " "
   1931            (match-string 2 match)
   1932            "\u2008")))
   1933        (replace-regexp-in-string
   1934         "^\\(.\\{81\\}\\)"
   1935         (lambda (match)
   1936           (lexic-format-reflow-text match 80 1 0 "   " "[, ]")))
   1937        (replace-regexp-in-string
   1938         ","
   1939         (propertize "," 'face 'font-lock-type-face))))
   1940 
   1941 ;;;###autoload
   1942 (defun lexic-dictionary-help ()
   1943   "Show the Lexic help page."
   1944   (interactive)
   1945   (let ((dict-help-buf (get-buffer-create "*lexic-dict-help*")))
   1946     (with-current-buffer dict-help-buf
   1947       (setq buffer-read-only t)
   1948       (with-silent-modifications
   1949         (erase-buffer)
   1950         (insert "#+title: Lexic Dictionary Help
   1951 #+author: TEC
   1952 
   1953 * sdcv
   1954 
   1955 First you want to make sure you have the *stardict* CLI tool =sdcv=.
   1956 It should be available in [[https://repology.org/project/sdcv/versions][many package repositories]] under that name.
   1957 
   1958 Not that by itself =sdcv= does not come with any dictionaries, you'll need to install those yourself.
   1959 
   1960 * Downloading dictionaries
   1961 
   1962 You can find quite a few dictionaries on http://download.huzheng.org/dict.org/
   1963 and http://download.huzheng.org/bigdict/. By default, Lexic will provide nice
   1964 formatting for some of these. Namely:
   1965 
   1966 | Dictionary             | Recognised ~bookname~                            |
   1967 |------------------------+------------------------------------------------|
   1968 | [[http://download.huzheng.org/dict.org/stardict-dictd-web1913-2.4.2.tar.bz2.][Webster 1913]]        | =Webster's Revised Unabridged Dictionary (1913)= |
   1969 | [[http://download.huzheng.org/dict.org/stardict-dictd_www.dict.org_elements-2.4.2.tar.bz2][Elements]]             | =Elements database=                              |
   1970 | Hitchcock's Bible Name | =Hitchcock's Bible Names Dictionary=             |
   1971 | Online Etymology       | =Online Etymology Dictionary=                    |
   1972 | [[http://download.huzheng.org/bigdict/stardict-Soule_s_Dictionary_of_English_Synonyms-2.4.2.tar.bz2][Soule's Synonyms]]     | =Soule's Dictionary of English Synonyms=         |
   1973 
   1974 The stardict form of the /Online Etymology Dictionary/ can be created by running
   1975 [[https://framagit.org/tuxor1337/dictmaster][dictmaster]]. Unfortunately I do not know of any nice hosted result of this.
   1976 
   1977 * Installing dictionaries
   1978 
   1979 By default =sdcv= will look for dictionaries in =$HOME/.stardict/dic=, (do take note
   1980 of the =dic= part of the path) but this can be changed by setting environment
   1981 variable =STARDICT_DATA_DIR=.
   1982 
   1983 I recommend creating a folder for each dictionary in =STARDICT_DATA_DIR=, for
   1984 example:
   1985 
   1986 #+begin_example
   1987 $STARDICT_DATA_DIR
   1988 ├── elements
   1989 │   └── ...
   1990 ├── etymology
   1991 │   └── ...
   1992 ├── hitchcock
   1993 │   └── ...
   1994 ├── synonyms
   1995 │   ├── SoulesSynonyms.dict.dz
   1996 │   ├── SoulesSynonyms.idx
   1997 │   └── SoulesSynonyms.ifo
   1998 └── webster
   1999     └── ...
   2000 #+end_example
   2001 
   2002 A particular dictionary is composed of a few files:
   2003 + =DICT.dict.dz=
   2004 + =DICT.idx=
   2005 + =DICT.ifo=
   2006 
   2007 For our purposes, we only care about the =.ifo= (info) file. It should look
   2008 something like this.
   2009 #+begin_example
   2010 StarDict's dict ifo file
   2011 version=2.4.2
   2012 wordcount=160161
   2013 idxfilesize=3024035
   2014 bookname=Webster's Revised Unabridged Dictionary (1913)
   2015 description=Connoisseur's reference to American English
   2016 date=2007.8.28
   2017 sametypesequence=m
   2018 #+end_example
   2019 
   2020 We are particularly interested in the ~bookname=~ line, as if you have one of the
   2021 [[Downloading dictionaries][recognised dictionaries]] in order to get the nice formatting you need to set the
   2022 ~bookname~ to the recognised value.
   2023 
   2024 * Using Lexic
   2025 
   2026 Once you have =sdcv= and some dictionaries installed, just go ahead and try =M-x
   2027 lexic-search= 🙂."))
   2028       (goto-char (point-min))
   2029       (org-mode)
   2030       (set-transient-map special-mode-map t))
   2031     (switch-to-buffer-other-window dict-help-buf)))
   2032 
   2033 (provide 'lexic)
   2034 ;;; lexic.el ends here