lexic.el (86326B)
1 ;;; lexic.el --- A major mode to find out more about words -*- lexical-binding: t; -*- 2 3 ;; Copyright 2006~2008 pluskid, 4 ;; 2011~2012 gucong 5 ;; 2020~2021 tecosaur 6 7 ;; Author: pluskid <pluskid@gmail.com>, 8 ;; gucong <gucong43216@gmail.com>, 9 ;; TEC <tec@tecosaur.com> 10 ;; 11 ;; Maintainer: TEC <tec@tecosaur.com> 12 ;; Version: 0.0.1 13 ;; Package-Version: 20220501.1432 14 ;; Package-Commit: f9b3de4d9c2dd1ce5022383e1a504b87bf7d1b09 15 ;; Homepage: https://github.com/tecosaur/lexic 16 ;; Package-Requires: ((emacs "26.3")) 17 18 ;;; License: 19 20 ;; This program is free software; you can redistribute it and/or 21 ;; modify it under the terms of the GNU General Public License as 22 ;; published by the Free Software Foundation; either version 2, or (at 23 ;; your option) any later version. 24 ;; 25 ;; This program is distributed in the hope that it will be useful, 26 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 27 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 28 ;; GNU General Public License for more details. 29 ;; 30 ;; You should have received a copy of the GNU General Public License 31 ;; along with this program; if not, write to the Free Software 32 ;; Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 33 34 ;;; Commentary: 35 36 ;; This provides a major mode to view the output of dictionary tools, 37 ;; and utilities that perform searches and nicely format the results. 38 ;; 39 ;; Currently tied to sdcv, but this is intended to be changed in the future. 40 41 ;; Put this file into your load-path and the following into your 42 ;; ~/.emacs: 43 ;; (require 'lexic-mode) 44 ;; (global-set-key (kbd "C-c d") 'lexic-search) 45 46 ;;; Changelog: 47 48 ;; 2020/07/28 49 ;; * New variable: `lexic-dictionary-specs', allows for 50 ;; - Dictionary display names 51 ;; - Custom dictionary entry formatters 52 ;; - Dictionary sorting 53 ;; * Update outline function calls to replace depreciated names.' 54 ;; * Tweak lexic-mode 55 ;; - Remove font-locking 56 ;; - Change `outline-regexp' to ZERO WIDTH SPACE 57 ;; - Add `outline-heading-end-regexp', a PUNCTUATION SPACE 58 ;; - Expand the mode map, to bind 59 ;; - Two modes of entry navigation 60 ;; - History navigation 61 ;; - TAB for toggling an entry 62 ;; * Expand popup window 63 ;; * Add linear history navigation 64 ;; * Revise behaviour of `lexic-next-entry' and `lexic-previous-entry' 65 ;; * New function: `lexic-get-outline-path' which gives the structural path 66 ;; to the current position in buffer, e.g. dict → word v. t. → 1. (Chem.) 67 ;; * Remove (now unused) custom face vars, could do with adding some 68 ;; new face vars in the future 69 ;; * Change the default of `lexic-program-path' to be an absolute path 70 ;; * New functions: `lexic-format-result', `lexic-failed-p', 71 ;; and `lexic-format-failure' to handle the upgraded entry processing 72 ;; * New functions: `lexic-format-webster', `lexic-format-online-etym', 73 ;; `lexic-format-element', and `lexic-format-soule' to format 74 ;; the dictionaries recognised by default in `lexic-dictionary-specs'. 75 ;; - with helper functions `lexic-format-webster-diacritics', and 76 ;; `lexic-format-expand-abbreviations' for nicer content. 77 78 ;; 2012/01/02 79 ;; * New variable: `lexic-word-processor' 80 ;; * Breaking change: 81 ;; for `lexic-dictionary-alist', non-list (non-nil) value now means full dictionary list 82 ;; * Rewrite `lexic-search' for both interactive and non-interactive use 83 ;; * `lexic-dictionary-list' is left for customization use only 84 ;; * Better highlighting. 85 ;; 86 ;; 2011/06/30 87 ;; * New feature: parse output for failed lookup 88 ;; * Keymap modification 89 ;; 90 ;; 2008/06/11 91 ;; * lexic-mode v 0.1 init (with background process) 92 93 ;;; Code: 94 95 (require 'outline) 96 (require 'visual-fill-column nil t) 97 (require 'cl-lib) 98 (require 'subr-x) 99 100 (declare-function spell-fu-mode "spell-fu") 101 102 ;;;;################################################################## 103 ;;;; User Options, Variables 104 ;;;;################################################################## 105 106 (defvar lexic-buffer-name "*lexic*" 107 "The name of the buffer of lexic.") 108 (defvar lexic-dictionary-list t 109 "A list of dictionaries to use. 110 Each entry is a string denoting the name of a dictionary, which 111 is then passed to lexic through the '-u' command line option. 112 Any non-list value means using all the dictionaries.") 113 (defvar lexic-dictionary-alist nil 114 "An alist of dictionaries, used to interactively form the dictionary list. 115 It has the form: 116 ((\"full\" . t) 117 (\"group1\" \"dict1\" \"dict2\" ...) 118 (\"group2\" \"dict2\" \"dict3\")) 119 Any cons cell here means using all dictionaries.") 120 121 (defvar lexic-program-path (executable-find "sdcv") 122 "The path of lexic program.") 123 124 (defvar lexic-dictionary-path nil 125 "The path to the dictionaries.") 126 127 (defvar lexic-word-processor nil 128 "This is the function that take a word (stirng) 129 and return a word or a list of words for lookup by `lexic-search'. 130 All lookup result(s) will finally be concatenated together. 131 132 nil value means do nothing with the original word. 133 134 The following is an example. This function takes the original word and 135 compare whether simplified and traditional form of the word are the same. 136 If not, look up both of the words. 137 138 (lambda (word) 139 (let ((sim (chinese-conv word \"simplified\")) 140 (tra (chinese-conv word \"traditional\"))) 141 (if (not (string= sim tra)) 142 (list sim tra) 143 word))) 144 ") 145 146 147 (defvar lexic-current-dictionary-list nil 148 "A list of dictionaries to use in searches. 149 Either entries from `lexic-dictionary-alist', or any non-list value, 150 which will cause all avalible dictionaries to be used.") 151 152 (defvar lexic-wait-timeout 2 153 "The max time (in seconds) to wait for the lexic process to produce some output.") 154 (defvar lexic-wait-interval 0.1 155 "The interval (in seconds) to sleep each time to wait for lexic's output.") 156 157 (defconst lexic-process-name " %lexic-mode-process%") 158 (defconst lexic-process-buffer-name " *lexic-mode-process*") 159 160 (defvar lexic-word-prompts '("Enter word or phrase: ") 161 "A list of prompts that lexic use to prompt for word.") 162 163 (defvar lexic-choice-prompts '("Your choice[-1 to abort]: ") 164 "A list of prompts that lexic use to prompt for a choice of multiple candidates.") 165 166 (defvar lexic-result-patterns '("^Found [0-9]+ items, similar to [*?/|]*\\(.+?\\)[*?]*\\.") 167 "A list of patterns to extract result word of lexic. 168 Special characters are stripped.") 169 170 (defvar lexic--search-history nil) 171 (defvar lexic--search-history-position -1) 172 173 (defvar lexic-expand-abbreviations t 174 "Whether or not to try to expand abbreviations, where they are expected.") 175 176 177 ;;; ================================================================== 178 ;;; Frontend, search word and display lexic buffer 179 180 ;;;###autoload 181 (defun lexic-search (word &optional dict-list-name dict-list interactive-p no-history-p) 182 "Search WORD through the command line tool lexic. 183 The result will be displayed in buffer named with 184 `lexic-buffer-name' with `lexic-mode' if called interactively. 185 186 When provided with DICT-LIST-NAME, query `lexic-dictionary-alist' 187 to get the new dictionary list before search. 188 Alternatively, dictionary list can be specified directly 189 by DICT-LIST. Any non-list value of it means using all dictionaries. 190 191 When called interactively, prompt for the word. 192 Prefix argument have the following meaning: 193 If `lexic-dictionary-alist' is defined, 194 use prefix argument to select a new DICT-LIST-NAME. 195 Otherwise, prefix argument means using all dictionaries. 196 197 When INTERACTIVE-P is non-nil, a buffer displaying the result(s) is shown. 198 Otherwise, the result is returned as a string. 199 200 When NO-HISTORY-P is non-nil, the search is not added to the session history. 201 202 Word may contain some special characters: 203 * match zero or more characters 204 ? match zero or one character 205 / used at the beginning, for fuzzy search 206 | used at the beginning, for data search 207 \ escape the character right after 208 209 TODO decouple the tool from the general method." 210 (interactive 211 (let* ((dict-list-name 212 (and current-prefix-arg lexic-dictionary-alist 213 (completing-read "Select dictionary list: " 214 lexic-dictionary-alist nil t))) 215 (dict-list 216 (and current-prefix-arg (not lexic-dictionary-alist))) 217 (guess (or (and transient-mark-mode mark-active 218 (buffer-substring-no-properties 219 (region-beginning) (region-end))) 220 (current-word nil t) 221 "lexical")) 222 (word (read-string (format "Search dict (default: %s): " guess) 223 nil nil guess))) 224 (list word dict-list-name dict-list t))) 225 ;; init current dictionary list 226 (unless lexic-current-dictionary-list 227 (setq lexic-current-dictionary-list lexic-dictionary-list)) 228 ;; dict-list-name to dict-list 229 (when (and (not dict-list) dict-list-name) 230 (if (not lexic-dictionary-alist) 231 (error "`lexic-dictionary-alist' not defined")) 232 (setq dict-list 233 (cdr (assoc dict-list-name lexic-dictionary-alist)))) 234 ;; prepare new dictionary list 235 (when (and dict-list (not (equal lexic-current-dictionary-list dict-list))) 236 (setq lexic-current-dictionary-list dict-list) 237 ;; kill lexic process 238 (and (get-process lexic-process-name) 239 (kill-process (get-process lexic-process-name))) 240 (while (get-process lexic-process-name) 241 (sleep-for 0.01))) 242 (let ((result 243 (mapconcat 244 (lambda (w) (lexic-do-lookup w)) 245 (if lexic-word-processor 246 (let ((processed (funcall lexic-word-processor word))) 247 (if (listp processed) processed (list processed))) 248 (list word)) 249 ""))) 250 (unless (or no-history-p (string= word 251 (nth lexic--search-history-position 252 lexic--search-history))) 253 (setq lexic--search-history 254 (append (cl-subseq lexic--search-history 255 0 (1+ lexic--search-history-position)) 256 (list word)) 257 lexic--search-history-position (1- (length lexic--search-history)))) 258 (if (not interactive-p) 259 result 260 (with-current-buffer (get-buffer-create lexic-buffer-name) 261 (setq buffer-read-only nil) 262 (erase-buffer) 263 (insert result)) 264 (lexic-goto-lexic) 265 (lexic-mode) 266 (lexic-mode-reinit) 267 (let* ((window (get-buffer-window (lexic-get-buffer))) 268 (height (window-height window)) 269 (min-height (pcase (count-lines (point-min) (point-max)) 270 ((pred (> 50)) 12) 271 ((pred (> 100)) 16) 272 (_ 20)))) 273 (when (> min-height height) 274 (window-resize window (- 12 height))))))) 275 276 ;;;###autoload 277 (defun lexic-search-word-at-point () 278 "Perform `lexic-search' on the word at or near point." 279 (interactive) 280 (lexic-search 281 (downcase 282 (or (and transient-mark-mode mark-active 283 (buffer-substring-no-properties 284 (region-beginning) (region-end))) 285 (current-word nil t) 286 "lexical")) 287 nil nil t)) 288 289 ;;;###autoload 290 (defun lexic-list-dictionary () 291 "Show available dictionaries." 292 (interactive) 293 (let (resize-mini-windows) 294 (shell-command (concat lexic-program-path " -l") lexic-buffer-name))) 295 296 (defun lexic-generate-dictionary-argument () 297 "Generate the appropriate stcv dictionary argument. 298 Using `lexic-current-dictionary-list' and `lexic-dictionary-path'." 299 (append 300 (and lexic-dictionary-path (list "--data-dir" (expand-file-name lexic-dictionary-path))) 301 (and (listp lexic-current-dictionary-list) 302 (mapcan (lambda (dict) 303 (list "-u" dict)) 304 lexic-current-dictionary-list)))) 305 306 (defun lexic-search-history-backwards () 307 "Show the previous word searched." 308 (interactive) 309 (if (> lexic--search-history-position 0) 310 (lexic-search (nth (setq lexic--search-history-position 311 (1- lexic--search-history-position)) 312 lexic--search-history) 313 nil nil t t) 314 (message "At start of search history."))) 315 316 (defun lexic-search-history-forwards () 317 "Show the next word searched." 318 (interactive) 319 (if (> (length lexic--search-history) lexic--search-history-position) 320 (lexic-search (nth (setq lexic--search-history-position 321 (1+ lexic--search-history-position)) 322 lexic--search-history) 323 nil nil t t) 324 (message "At end of search history."))) 325 326 ;;; ================================================================== 327 ;;; utilities to switch from and to lexic buffer 328 (defvar lexic-previous-window-conf nil 329 "Window configuration before switching to lexic buffer.") 330 (defun lexic-goto-lexic () 331 "Switch to lexic buffer in other window." 332 (interactive) 333 (unless (eq (current-buffer) 334 (lexic-get-buffer)) 335 (setq lexic-previous-window-conf (current-window-configuration))) 336 (let* ((buffer (lexic-get-buffer)) 337 (window (get-buffer-window buffer))) 338 (if (null window) 339 (switch-to-buffer-other-window buffer) 340 (select-window window)))) 341 342 (defun lexic-return-from-lexic () 343 "Bury lexic buffer and restore the previous window configuration." 344 (interactive) 345 (kill-process (get-process lexic-process-name)) 346 (if (window-configuration-p lexic-previous-window-conf) 347 (progn 348 (set-window-configuration lexic-previous-window-conf) 349 (setq lexic-previous-window-conf nil) 350 (bury-buffer (lexic-get-buffer))) 351 (bury-buffer))) 352 353 (defun lexic-get-buffer () 354 "Get the lexic buffer. Create one if there's none." 355 (let ((buffer (get-buffer-create lexic-buffer-name))) 356 (with-current-buffer buffer 357 (unless (eq major-mode 'lexic-mode) 358 (lexic-mode))) 359 buffer)) 360 361 ;;; ================================================================== 362 363 (defvar lexic-mode-map 364 (let ((map (copy-keymap special-mode-map))) 365 (define-key map "q" 'lexic-return-from-lexic) 366 (define-key map (kbd "RET") 'lexic-search-word-at-point) 367 (define-key map "a" 'outline-show-all) 368 (define-key map "h" 'outline-hide-body) 369 (define-key map "o" 'lexic-toggle-entry) 370 (define-key map (kbd "TAB") 'lexic-toggle-entry) 371 (define-key map "n" 'lexic-next-entry) 372 (define-key map "N" (lambda () (interactive) (lexic-next-entry t))) 373 (define-key map "p" 'lexic-previous-entry) 374 (define-key map "P" (lambda () (interactive) (lexic-previous-entry t))) 375 (define-key map "b" 'lexic-search-history-backwards) 376 (define-key map "f" 'lexic-search-history-forwards) 377 (set-keymap-parent map special-mode-map) 378 map) 379 "Keymap for `lexic-mode'.") 380 381 382 (define-derived-mode lexic-mode fundamental-mode "lexic" 383 "Major mode to look up word through lexic. 384 \\{lexic-mode-map} 385 Turning on lexic mode runs the normal hook `lexic-mode-hook'. 386 387 This mode locally removes any `spell-fu-mode' or `flyspell-mode' entries in 388 `text-mode-hook', but won't catch any other spell-checking initialisation. 389 Consider resolving any edge cases with an addition to `lexic-mode-hook'." 390 (setq buffer-read-only t) 391 (add-hook 'kill-buffer-hook 392 (lambda () 393 (let ((proc (get-process lexic-process-name))) 394 (when (process-live-p proc) 395 (kill-process proc)))) 396 nil t) 397 (setq-local outline-regexp "\u200B+") 398 (setq-local outline-heading-end-regexp "\u2008") 399 (when (featurep 'visual-fill-column) 400 (setq-local visual-fill-column-center-text t) 401 (visual-fill-column-mode 1))) 402 403 (defun lexic-mode-reinit () 404 "Re-initialize buffer. 405 Hide all entrys but the first one and goto 406 the beginning of the buffer." 407 (ignore-errors 408 (setq buffer-read-only nil) 409 (lexic-parse-failed) 410 (setq buffer-read-only t) 411 412 (let* ((window (get-buffer-window (lexic-get-buffer))) 413 (win-height (window-height window)) 414 (content-height (count-lines (point-min) (point-max)))) 415 (when (> 0.5 (/ (float win-height) content-height)) 416 (outline-hide-sublevels 3))) 417 418 (goto-char (point-min)) 419 (search-forward "\u200B\u200B") 420 (left-char 1))) 421 422 (defun lexic-parse-failed () 423 "Determine if the search failed, and if so parse the failure." 424 (goto-char (point-min)) 425 (let (save-word) 426 (while (re-search-forward "^[0-9]+).*-->\\(.*\\)$" nil t) 427 (let ((cur-word (match-string-no-properties 1))) 428 (unless (string= save-word cur-word) 429 (setq save-word cur-word) 430 (re-search-backward "^\\(.\\)" nil t) 431 (insert (format "\n==>%s\n" save-word))))))) 432 433 (defun lexic-expand-entry () 434 "Show the children of the current entry, or subtree if there are none." 435 (outline-show-children) 436 (when ; no children 437 (<= 0 (- (save-excursion (outline-next-heading) (point)) 438 (save-excursion (outline-end-of-subtree) (point)))) 439 (outline-show-subtree))) 440 441 (defun lexic-next-entry (&optional linear) 442 "Move to the next entry, targeting the same level unless LINEAR is set." 443 (interactive) 444 (when (< 1 (lexic-outline-level)) 445 (outline-hide-subtree)) 446 (if linear 447 (outline-next-heading) 448 (condition-case nil 449 (outline-forward-same-level 1) 450 (error 451 (condition-case nil 452 (progn 453 (outline-up-heading 1 t) 454 (outline-forward-same-level 1)) 455 (error (progn (outline-next-heading) 456 (lexic-expand-entry))))))) 457 (lexic-expand-entry) 458 (recenter-top-bottom 1) 459 (message "%s" (lexic-get-outline-path))) 460 461 (defun lexic-previous-entry (&optional linear) 462 "Move to the previous entry, targeting the same level unless LINEAR is set." 463 (interactive) 464 (outline-hide-subtree) 465 (if (= 2 (line-number-at-pos)) 466 (recenter-top-bottom -1) 467 (if linear 468 (outline-previous-heading) 469 (condition-case nil 470 (outline-backward-same-level 1) 471 (error 472 (condition-case nil 473 (outline-up-heading 1 t) 474 (error (outline-previous-heading)))))) 475 (lexic-expand-entry) 476 (recenter-top-bottom 2)) 477 (message "%s" (lexic-get-outline-path))) 478 479 (defun lexic-toggle-entry () 480 "Toggle the folding of the lexic entry point currently lies in." 481 (interactive) 482 (save-excursion 483 (outline-back-to-heading) 484 (if (not (save-excursion 485 (outline-end-of-heading) 486 (outline-invisible-p (line-end-position)))) 487 (outline-hide-subtree) 488 (outline-show-subtree)))) 489 490 ;;; ================================================================== 491 ;;; Support for lexic process in background 492 (defun lexic-do-lookup (word &optional raw-p) 493 "Send the WORD to the lexic process and return the result. 494 Optional argument RAW-P signals whether the result should be formatted or not." 495 (let ((process (lexic-get-process))) 496 (process-send-string process (concat word "\n")) 497 (with-current-buffer (process-buffer process) 498 (let ((i 0) result done) 499 (while (and (not done) 500 (< i lexic-wait-timeout)) 501 (when (lexic-match-tail lexic-word-prompts) 502 (setq result (buffer-substring-no-properties (point-min) 503 (point-max))) 504 (setq done t)) 505 (when (lexic-match-tail lexic-choice-prompts) 506 (process-send-string process "-1\n")) 507 (unless done 508 (sleep-for lexic-wait-interval) 509 (setq i (+ i lexic-wait-interval)))) 510 (unless (< i lexic-wait-timeout) 511 ;; timeout 512 (kill-process process) 513 (error "ERROR: timeout waiting for lexic")) 514 (erase-buffer) 515 (if raw-p result 516 (lexic-format-result result)))))) 517 518 (defun lexic-oneshot-lookup (word &optional raw-p args) 519 "Use a oneshot stcv process just to look up WORD, with ARGS. 520 Optional argument RAW-P signals whether the result should be formatted or not." 521 (let ((result (with-temp-buffer 522 (apply #'call-process lexic-program-path nil t nil 523 (append '("-n") args (list word))) 524 (buffer-string)))) 525 (if raw-p result 526 (lexic-format-result result)))) 527 528 (defun lexic-get-process () 529 "Get or create the lexic process." 530 (let ((process (get-process lexic-process-name))) 531 (unless process 532 (with-current-buffer (get-buffer-create 533 lexic-process-buffer-name) 534 (erase-buffer) 535 (setq process (apply #'start-process 536 lexic-process-name 537 lexic-process-buffer-name 538 lexic-program-path 539 (lexic-generate-dictionary-argument))) 540 (set-process-query-on-exit-flag process nil) 541 ;; kill the initial prompt 542 (let ((i 0)) 543 (message "starting lexic...") 544 (while (and (not (lexic-match-tail lexic-word-prompts)) 545 (< i lexic-wait-timeout)) 546 (sit-for lexic-wait-interval t) 547 (setq i (+ i lexic-wait-interval))) 548 (unless (< i lexic-wait-timeout) 549 ;; timeout 550 (kill-process process) 551 (error "ERROR: timeout waiting for lexic")) 552 (erase-buffer)) 553 (message ""))) 554 process)) 555 556 (defun lexic-buffer-tail (length) 557 "Get a substring of length LENGTH at the end of current buffer." 558 (let ((beg (- (point-max) length)) 559 (end (point-max))) 560 (if (< beg (point-min)) 561 (setq beg (point-min))) 562 (buffer-substring-no-properties beg end))) 563 564 (defun lexic-match-tail (prompts) 565 "Look for a sdcv prompt from PROMPTS in the tail of the current buffer. 566 Remove it and return t if found. Return nil otherwise." 567 (let ((done nil) 568 (prompt nil)) 569 (while (and (not done) 570 prompts) 571 (setq prompt (car prompts)) 572 (setq prompts (cdr prompts)) 573 (when (string-equal prompt 574 (lexic-buffer-tail (length prompt))) 575 (delete-region (- (point-max) (length prompt)) 576 (point-max)) 577 (setq done t))) 578 done)) 579 580 ;;;;################################################################## 581 ;;;; Output Processing 582 ;;;;################################################################## 583 584 (defun lexic-format-result (result) 585 "For a RESULT from lexic, test for failure and format accordingly. 586 Entries are sorted by their :priority in `lexic-dictionary-specs' then formatted 587 by `lexic-format-result' in successful case, `cases-format-failure' otherwise." 588 (cond 589 ((string-match-p "^Nothing similar to" result) 590 (lexic-consider-no-results)) 591 ((lexic-failed-p result) 592 (lexic-format-failure result)) 593 (t 594 (let* ((entries 595 (sort (lexic-parse-results result) 596 (lambda (a b) 597 (< (or (lexic-dictionary-spec (plist-get a :dict) :priority) 1) 598 (or (lexic-dictionary-spec (plist-get b :dict) :priority) 1))))) 599 (word (save-match-data 600 (string-match "\\`Found.* similar to \\(\\w+\\)\\." result) 601 (downcase (match-string 1 result))))) 602 (concat 603 "\u200B" 604 (propertize (capitalize word) 'face 'outline-1) 605 "\u2008" 606 (apply #'concat 607 (mapcar (lambda (e) 608 (lexic-format-entry 609 e word)) 610 entries))))))) 611 612 (defun lexic-consider-no-results () 613 "No results have been found. What should we tell the user?" 614 (let ((dicts? (not (string-match-p "\\`Dictionary's name +Word count[\n ]+\\'" 615 (shell-command-to-string (concat lexic-program-path " -l")))))) 616 (if dicts? 617 (user-error "Couldn't find anything similar to your search, sorry :(") 618 (user-error "No results found, but you don't seem to have any dictionaries installed! Try %s" 619 (propertize "M-x lexic-dictionary-help" 'face 'font-lock-keyword-face))))) 620 621 (defun lexic-parse-results (result) 622 "Loop through every entry in RESULT and parse each one. 623 Returns a list of plists with keys :word, :dict, and :info." 624 (let (entries latest-match last-match dict word) 625 (with-temp-buffer 626 (insert result) 627 (goto-char (point-min)) 628 (while 629 (setq latest-match (re-search-forward 630 "-->\\([^\n]+\\)\n-->\\(.+\\)\n\n" nil t)) 631 (when last-match 632 (forward-line -3) 633 (setq entries 634 (append entries 635 `((:word ,word 636 :dict ,dict 637 :info ,(buffer-substring last-match (point)))))) 638 (forward-line 3)) 639 (setq last-match latest-match) 640 (setq dict (match-string 1)) 641 (setq word (match-string 2))) 642 (when last-match 643 (setq entries 644 (append entries 645 `((:word ,word 646 :dict ,dict 647 :info ,(buffer-substring last-match (point-max)))))))))) 648 649 (defun lexic-failed-p (results) 650 "Whether the RESULTS match the hardcoded failure pattern." 651 (if (string-match-p "Found [0-9]+ items, similar to [^.]+\\.\n0)" results) t nil)) 652 653 (defun lexic-format-failure (results) 654 "When lexic failed to match the word, format the suggestions in RESULTS." 655 (let (suggestions last-match) 656 (while (setq last-match 657 (string-match "^[0-9]+)\\(.*\\)-->\\([A-Za-z]+\\)" 658 results 659 (when last-match (1+ last-match)))) 660 (let ((dict (match-string 1 results)) 661 (word (match-string 2 results))) 662 (if (assoc dict suggestions) 663 (setcdr (assoc dict suggestions) 664 (list (append (cadr (assoc dict suggestions)) (list word)))) 665 (setq suggestions (append suggestions `((,dict . ((,word))))))))) 666 (concat 667 (propertize 668 (replace-regexp-in-string 669 "items" "entries" 670 (substring results 0 (string-match "\n" results))) 671 'face 'warning) 672 "\n" 673 (mapconcat (lambda (dict-suggestions) 674 (format "\u200B\u200B%s\n\u200B\u200B\u200B%s" 675 (propertize (or 676 (lexic-dictionary-spec (car dict-suggestions) :short) 677 (car dict-suggestions)) 678 'face 'outline-3) 679 (propertize 680 (mapconcat #'identity (cadr dict-suggestions) "\n\u200B\u200B\u200B") 681 'face 'font-lock-keyword-face))) 682 (sort suggestions 683 (lambda (a b) 684 (< (or (lexic-dictionary-spec (car a) :priority) 1) 685 (or (lexic-dictionary-spec (car b) :priority) 1)))) 686 "\n")))) 687 688 (defun lexic-format-entry (entry &optional expected-word) 689 "Format a given ENTRY, a plist with :word :dict and :info. 690 If the DICT has a :short value in `lexic-dictionary-specs' that is used as 691 the display name. Likewise if present, :formatter is used to generate the 692 entry. EXPECTED-WORD is the word expected in ENTRY." 693 (let ((dict (plist-get entry :dict))) 694 (concat 695 "\n\u200B\u200B" 696 (propertize (or (lexic-dictionary-spec dict :short) 697 dict) 'face 'outline-3) 698 "\n\u2008\n" 699 (if-let* ((formatter (lexic-dictionary-spec dict :formatter))) 700 (let ((case-fold-search nil)) 701 (string-trim (funcall formatter entry expected-word))) 702 (plist-get entry :info)) 703 "\n"))) 704 705 (defun lexic-get-outline-path () 706 "Return a string giving the structural path to the current position." 707 (let ((outline-path "") 708 (last-pos 0) 709 outline-level-current substring level-regexp) 710 (save-excursion 711 (outline-back-to-heading) 712 (setq outline-level-current (lexic-outline-level)) 713 (while (/= (point) last-pos) 714 (setq outline-level-current (lexic-outline-level)) 715 (setq substring 716 (buffer-substring 717 (point) 718 (save-excursion (search-forward "\u2008") (point)))) 719 (setq level-regexp 720 (pcase outline-level-current 721 (1 "^\\([^\n]+\\)") 722 (2 "^\\([^ \n]+\\)") 723 (3 "^\u200B\u200B*\\([^,]+\\(?:, [ &.;a-z]+\\)?\\)") 724 (4 "\\([0-9]+\\.\\( ?([^)]+)\\)?\\( \\w+\\)\\{0,4\\}\\)") 725 (5 "\\(([a-z])\\( ?([^)]+)\\)?\\( \\w+\\)\\{0,4\\}\\)") 726 (_ "^\u200B\u200B*\\([^ ]+\\)"))) 727 (setq outline-path 728 (concat 729 (propertize " → " 'face 'bold) 730 (save-match-data 731 (string-match level-regexp substring) 732 (match-string 1 substring)) 733 outline-path)) 734 (setq last-pos (point)) 735 (ignore-errors 736 (outline-up-heading 1))) 737 (substring outline-path 2)))) 738 739 (defun lexic-outline-level () 740 "It seems that while (outline-level) should work, it has issues." 741 (- (save-excursion (outline-back-to-heading) 742 (search-forward-regexp "\u200B+")) 743 (point))) 744 745 (defvar lexic-dictionary-specs 746 '(("Webster's Revised Unabridged Dictionary (1913)" 747 :formatter lexic-format-webster 748 :priority 1) 749 ("Elements database" 750 :short "Element" 751 :formatter lexic-format-element 752 :priority 2) 753 ("Hitchcock's Bible Names Dictionary" 754 :short "Hitcchcock's Bible Names" 755 :priority 3) 756 ("Online Etymology Dictionary" 757 :short "Etymology" 758 :formatter lexic-format-online-etym 759 :priority 4) 760 ("Soule's Dictionary of English Synonyms" 761 :short "Synonyms" 762 :formatter lexic-format-soule 763 :priority 5)) 764 "List of dictionary specifications. 765 In each entry the car is the name according to lexic, and the cdr is 766 a plist whith the following options: 767 :short - a (usually) shorter display name for the dictionary 768 :formatter - a function with signature (ENTRY WORD) that returns a string 769 :priority - sort priority, defaults to 1") 770 771 (defun lexic-dictionary-spec (dict spec) 772 "Helper function to get a :SPEC of a given DICT." 773 (plist-get (cdr (assoc dict lexic-dictionary-specs)) spec)) 774 775 (defun lexic-format-webster (entry &optional _expected-word) 776 "Make a Webster's dictionary ENTRY for WORD look nice. 777 Designed for Webster's Revised Unabridged Dictionary (1913),as found at 778 http://download.huzheng.org/dict.org/stardict-dictd-web1913-2.4.2.tar.bz2. 779 780 This should also work nicely with GCIDE." 781 (thread-last (plist-get entry :info) 782 (lexic-format-webster-diacritics) 783 (replace-regexp-in-string ; entry dividors 784 (format "\n\n\\(%s\\)" (plist-get entry :word)) 785 "\n ━━━━━━━━━ ■ ━━━━━━━━━\n\n\\1") 786 (replace-regexp-in-string ; entry headline 787 (rx line-start 788 (group-n 1 ; word 789 (any "A-Z") 790 (+ (any "a-z"))) 791 (optional " \\" ; word2 792 (group-n 2 (+ (not (any "\\")))) 793 "\\") 794 (optional " (" ; pronounciation 795 (group-n 3 (+ (not (any ")")))) 796 ")") 797 ", " 798 (group-n 4 ; part of speech 799 (+ (any "A-Z" "a-z" ".;&" " "))) 800 (optional "[" ; etymology / alternative forms 801 (group-n 5 802 (+ (or (+ (not (any "]["))) 803 (and "[" (+ (not (any "]["))) "]")))) 804 "]") 805 (optional ; definitely etymology 806 (+ (any "\n" " ")) "[" 807 (group-n 6 808 (+ (or (+ (not (any "]["))) 809 (and "[" (+ (not (any "]["))) "]")))) 810 "]") 811 (optional " (" ; category 812 (group-n 7 (+ (not (any ")")))) 813 ")")) 814 (lambda (match) 815 (let* ((word2 (match-string 2 match)) 816 (pronounciation (match-string 3 match)) 817 (part-of-speech (lexic-format-expand-abbreviations 818 (replace-regexp-in-string " \\'" "" 819 (match-string 4 match)))) 820 (alternative-forms (when (match-string 6 match) 821 (lexic-format-expand-abbreviations (match-string 5 match)))) 822 (etymology (lexic-format-expand-abbreviations (match-string (if alternative-forms 6 5) match))) 823 (category (lexic-format-expand-abbreviations (match-string 7 match))) 824 (last-newline (lambda (text) (- (length text) 825 (or (save-match-data 826 (string-match "\n[^\n]*\\'" text)) 0))))) 827 (concat 828 "\u200B\u200B\u200B" 829 (propertize word2 830 'face 'bold) 831 (when pronounciation 832 (propertize (format " %s" pronounciation) 833 'face 'font-lock-type-face)) 834 ", " 835 (propertize part-of-speech 836 'face '(bold font-lock-keyword-face)) 837 (when alternative-forms 838 (setq alternative-forms 839 (lexic-format-reflow-text 840 (format " [%s]" alternative-forms) 841 80 10 842 (+ 3 (if pronounciation 1 0) 843 (funcall last-newline 844 (concat word2 pronounciation part-of-speech))) 845 " ")) 846 (propertize alternative-forms 847 'face 'diff-context)) 848 (when etymology 849 (setq etymology 850 (lexic-format-reflow-text 851 (format " [%s]" etymology) 852 80 10 853 (+ 3 (if pronounciation 1 0) 854 (funcall last-newline 855 (concat word2 pronounciation part-of-speech alternative-forms))) 856 " ")) 857 (propertize etymology 858 'face 'font-lock-comment-face)) 859 (when category 860 (propertize (format " (%s)" category) 861 'face 'font-lock-constant-face)) 862 "\u2008")))) 863 (replace-regexp-in-string ; categorised terms 864 "{\\([^}]+?\\)}\\(.?\\) (\\([^)]+?\\))" 865 (lambda (match) 866 (let ((term (match-string 1 match)) 867 (punct (match-string 2 match)) 868 (category (match-string 3 match))) 869 (concat 870 (propertize term 'face 'font-lock-keyword-face) 871 punct 872 (propertize (format " (%s)" 873 (if lexic-expand-abbreviations 874 (lexic-format-expand-abbreviations category) 875 category)) 876 'face 'font-lock-constant-face))))) 877 (replace-regexp-in-string ; other terms 878 "{\\([^}]+?\\)}" 879 (lambda (match) 880 (let ((term (match-string 1 match))) 881 (concat 882 (propertize term 'face 'font-lock-keyword-face))))) 883 (replace-regexp-in-string ; quotations 884 "^\n +\\(\\w[[:ascii:]]+?\\)\\(\n? *--[A-Za-z0-9. ]+\n? *[A-Za-z0-9. ]*\\)" 885 (lambda (match) 886 (let ((body (match-string 1 match)) 887 (author (match-string 2 match))) 888 (concat 889 "\n " 890 (propertize (format "❝%s❞" body) 891 'face 'font-lock-doc-face) 892 author "\n")))) 893 (replace-regexp-in-string ; attributions 894 " --\\([A-Z][A-Za-z. ]+\n? *[A-Za-z0-9. ]*\\)" 895 (lambda (match) 896 (propertize (concat " ──" (match-string 1 match)) 897 'face '(italic font-lock-type-face)))) 898 (replace-regexp-in-string ; inline quotations (1) 899 "``" "“") 900 (replace-regexp-in-string ; inline quotations (1) 901 "''" "”") 902 (replace-regexp-in-string ; em dash approximation 903 " -- " " ─── ") 904 (replace-regexp-in-string ; lists 905 " \\(?:\\([0-9]+\\.\\)\\|\\( ([a-z])\\)\\) \\(?: ?(\\([^)]+\\)) \\)?\\(.*\\)" 906 (lambda (match) 907 (let ((number (match-string 1 match)) 908 (letter (match-string 2 match)) 909 (category (match-string 3 match)) 910 (rest-of-line (match-string 4 match))) 911 (concat 912 (when letter "\u200B") 913 "\u200B\u200B\u200B\u200B " 914 (when number 915 (propertize number 'face '(bold font-lock-string-face))) 916 (when letter 917 (propertize letter 'face 'font-lock-string-face)) 918 (when category 919 (propertize (format " (%s)" 920 (if lexic-expand-abbreviations 921 (lexic-format-expand-abbreviations category) 922 category)) 923 'face 'font-lock-constant-face)) 924 " " 925 rest-of-line 926 "\u2008")))) 927 (replace-regexp-in-string ; note 928 " Note: " 929 (concat " " 930 (propertize " " 'display '(space . (:width 0.55))) 931 (propertize "☞" 'face 'font-lock-function-name-face) 932 " ")) 933 (replace-regexp-in-string ; subheadings 934 " \\(\\w+\\): " 935 (lambda (match) 936 (propertize (concat " "(match-string 1 match) ": ") 937 'face 'bold))))) 938 939 (defun lexic-format-expand-abbreviations (content &optional force) 940 "Expand certain standard abbreviations in CONTENT when `lexic-expand-abbreviations' or FORCE are non-nil." 941 (when content 942 (when (or lexic-expand-abbreviations force) 943 (let ((abbreviations 944 '(; A 945 ("adj" "adjective") 946 ("a" "adjective") 947 ("abbrev" "abbreviated") 948 ("abl" "ablative") 949 ("Abp" "Archbishop") 950 ("acc" "Acoustics") 951 ("act" "active") 952 ("adv" "adverb") 953 ("Agric" "Agriculture") 954 ("Alban" "Albanian") 955 ("Alg" "Algebra") 956 ("Am" "America") 957 ("Amer" "American") 958 ("Am" "Amos") 959 ("Am\\. Cyc" "Appleton's American Cyclopedia") 960 ("Anal. Geom" "Analytical Geometry") 961 ("Anat" "Anatomy") 962 ("Anc" "Ancient") 963 ("Angl\\. Ch" "Anglican Church") 964 ("aor" "aorist") 965 ("Ar" "Arabic") 966 ("Arch" "Architecture") 967 ("Arch\\. Pub\\. Soc" "Architectural Pub. Society") 968 ("Arith" "Arithmetic") 969 ("Arm\\., Armor" "Armorican") 970 ("AS" "Anglo-Saxon") 971 ("Astrol" "Astrology") 972 ("Astron" "Astronomy") 973 ("aug" "augmentative") 974 ;; B 975 ("Bank" "Banking") 976 ("Beau\\. & Fl" "Beaumont & Fletcher") 977 ("B\\. & Fl" "Beaumont & Fletcher") 978 ("Bib\\. Sacra" "Bibliotheca Sacra") 979 ("Bib" "Biblical") 980 ("Bibliog" "Bibliography") 981 ("Biol" "Biology") 982 ("Bisc" "Biscayan") 983 ("B\\. Jon" "Ben Jonson") 984 ("Bk\\. of Com\\. Prayer " "Book of Common Prayer") 985 ("Blackw\\. Mag" "Blackwood's Magazine") 986 ("Bohem" "Bohemian") 987 ("Bot" "Botany") 988 ("Bp" "Bishop") 989 ("Brande & C" "Brande & Cox") 990 ("Braz" "Brazilian") 991 ("Brit\\. Critic" "British Critic") 992 ("Brit\\. Quar\\. Rev" "British Quarterly Review") 993 ("Burl" "Burlesque") 994 ;; C 995 ("C" "Centigrade") 996 ("Cant" "Canticles") 997 ("Carp" "Carpentry") 998 ("Catal" "Catalan") 999 ("Cath\\. Dict" "Catholic Dictionary") 1000 ("Celt" "Celtic") 1001 ("cf" "confer") 1002 ("Cf" "Confer") 1003 ("Ch" "Church") 1004 ("Chald" "Chaldee") 1005 ("Chem" "Chemistry") 1006 ("Ch\\. Hist" "Church History") 1007 ("Chron" "Chronology, Chronicles") 1008 ("Civ" "Civil") 1009 ("Class" "Classical") 1010 ("Class\\. Myth" "Classical Mythology") 1011 ("Col" "Colossians") 1012 ("colloq\\., coll" "colloquial, colloquially") 1013 ("Com" "Commerce, Common") 1014 ("comp" "compound, compounded, composition") 1015 ("compar" "comparative") 1016 ("conj" "conjunction") 1017 ("Con\\. Sect" "Conic Sections") 1018 ("contr" "contracted, contraction") 1019 ("Copt" "Coptic") 1020 ("Corn" "Cornish") 1021 ("corrupt" "corrupted, corruption") 1022 ("Cotgr" "Cotgrave") 1023 ("Cyc\\. Med" "Cyclopedia of Practical Medicine") 1024 ("Crim\\. Law" "Criminal Law") 1025 ("Crystallog" "Crystallography") 1026 ("Cyc" "Cyclopedia") 1027 ;; D 1028 ("D" "Dutch (sometimes Daniel)") 1029 ("Dan" "Danish") 1030 ("dat" "dative") 1031 ("def" "definitions") 1032 ("Deut" "Deuteronomy") 1033 ("Dial" "Dialectic") 1034 ("dim" "diminutive") 1035 ("Diosc" "dioscorides") 1036 ("Disp" "Dispensatory") 1037 ("Disus" "Disused") 1038 ("Dom\\. Econ" "Domestic Economy") 1039 ("Dublin Univ\\. Mag" "Dublin University Magazine") 1040 ("Dyn" "Dynamics") 1041 ;; E 1042 ("E" "English") 1043 ("Eccl" "Ecclesiastical, Ecclesiastes") 1044 ("Eccl\\. Hist" "Ecclesiastical History") 1045 ("Ecclus" "Ecclesiasticus") 1046 ("Eclec\\. Rev" "Eclectic Review") 1047 ("Ed\\. Rev" "Edinburgh Review") 1048 ;; ("e\\. g" "exempli gratia (for example)") 1049 ("Egypt" "Egyptian") 1050 ("Elect" "Electricity") 1051 ("Elec" "Electrical") 1052 ("emph" "emphatic") 1053 ("Encyc\\. Amer" "Encyclopedia Americana") 1054 ("Encyc\\. Crit" "Encyclopedia Britannica") 1055 ("Encyc\\. Dict" "Hunter's Encyclopedic Dictionary") 1056 ("Encyc" "Encyclopedia") 1057 ("Eng\\. Cyc" "English Cyclopedia") 1058 ("Eng" "English") 1059 ("Engin" "Engineering") 1060 ("Eol" "Eolic") 1061 ("Eph\\., Ephes" "Ephesians") 1062 ("equiv" "equivalent") 1063 ("Esd" "Esdras") 1064 ("esp" "especially") 1065 ("Etch\\. & Eng" "Etching & Engraving") 1066 ("Ethnol" "Ethnology") 1067 ("etym\\., etymol" "etymology") 1068 ("Ex\\., Exod" "Exodus") 1069 ("Ezek" "Ezekiel") 1070 ;; F 1071 ("F" "French") 1072 ("f" "feminine") 1073 ("fem" "feminine") 1074 ("Fahr" "Fahrenheit") 1075 ("Far" "Farriery") 1076 ("Feud" "Feudal") 1077 ("Fig" "Figurative, figuratively") 1078 ("Fin" "Finnish") 1079 ("For\\. Quart\\. Rev" "Foreign Quarterly Review") 1080 ("Fort" "Fortification") 1081 ("Fr" "French") 1082 ("fr" "from") 1083 ("freq" "frequentative") 1084 ("Fries" "Friesic") 1085 ("fut" "future") 1086 ;; G 1087 ("G" "German") 1088 ("Gael" "Gaelic") 1089 ("Gal" "Galen") 1090 ("Gal" "Galatians") 1091 ("Galv" "Galvanism") 1092 ("gen" "generally, genitive") 1093 ("Geneal" "Genealogy") 1094 ("Gent\\. Mag" "Gentleman's Magazine") 1095 ("Geog" "Geography") 1096 ("Geol" "Geology") 1097 ("Geom" "Geometry") 1098 ("Ger" "Germanic or German") 1099 ("Gk" "Greek") 1100 ("Goth" "Gothic") 1101 ("Gov\\. of Tongue" "Government of the Tongue") 1102 ("Gr" "Greek") 1103 ("Gram" "Grammar") 1104 ("Gris" "Grisons") 1105 ("Gun" "Gunnery") 1106 ;; H 1107 ("H" "High") 1108 ("Hab" "Habakkuk") 1109 ("Hag" "Haggai") 1110 ("Ham\\. Nav\\. Encyc" "Hamersly's Naval Encyclopedia") 1111 ("Heb" "Hebrew") 1112 ("Her" "Heraldry") 1113 ("Hind" "Hindostanee") 1114 ("Hipp" "Hippocrates") 1115 ("Hist" "History") 1116 ("Horol" "Horology") 1117 ("Hort" "Horticulture") 1118 ("Hung" "Hungarian") 1119 ("Hydraul" "Hydraulics") 1120 ("Hydros" "Hydrostatics") 1121 ("hypoth" "hypothetical") 1122 ;; I 1123 ("Icel" "Icelandic") 1124 ;; ("i\\. e" "id est (that is)") 1125 ("Illust" "Illustration, Illustrated") 1126 ("imp" "imperfect") 1127 ("Imp\\. Dict" "Imperial Dictionary") 1128 ("incho" "inchoative") 1129 ("ind" "indicative") 1130 ("indef" "indefinite") 1131 ("inf" "infinitive") 1132 ("intens" "intensive") 1133 ("interj" "interjection") 1134 ("Internat\\. Cyc" "International Cyclopeia") 1135 ("Ion" "Ionic") 1136 ("i\\. q" "idem quod") 1137 ("Ir" "Irish") 1138 ("Is" "Isaiah") 1139 ("Isa" "Isaiah") 1140 ("It" "Italian") 1141 ;; J 1142 ("Jap" "Japanese") 1143 ("Jas" "James") 1144 ("Jav" "Javanese") 1145 ("Jer" "Jeremiah") 1146 ("Join" "Joinery") 1147 ("Josh" "Joshua") 1148 ("Judg" "Judges") 1149 ;; K 1150 ("K" "Kings") 1151 ;; L 1152 ("L" "Latin") 1153 ("Lam" "Lamentations") 1154 ("Lapp" "Lappish") 1155 ("Lat" "Latin") 1156 ("LD" "Low Dutch") 1157 ("Lett" "Lettish") 1158 ("Lev" "Leviticus") 1159 ("LG" "Low German") 1160 ("LGr" "Low Greek") 1161 ("Linn" "Linnæus") 1162 ("Lit" "Literally") 1163 ("lit" "literally") 1164 ("Lit" "Literature") 1165 ("Lith" "Lithuanian") 1166 ("LL" "Late Latin") 1167 ;; M 1168 ("M" "Middle") 1169 ("m" "masculine") 1170 ("masc" "masculine") 1171 ("Maced" "Macedonian") 1172 ("Mach" "Machinery") 1173 ("Mad" "Madam") 1174 ("Mag" "Magazine") 1175 ("Mal" "Malachi") 1176 ("Malay" "Malayan") 1177 ("Man" "Manège") 1178 ("Manuf" "Manufacturing") 1179 ("Mar" "Maritime") 1180 ("Math" "Mathematics") 1181 ("Matt" "Matthew") 1182 ("ME" "Middle English") 1183 ("Mech" "Mechanic") 1184 ("Med" "Medicine") 1185 ("Metal" "Metallurgy") 1186 ("Metaph" "Metaphysics") 1187 ("Meteor" "Meteorolgy") 1188 ("mgr" "milligrams") 1189 ("MHG" "Middle High German") 1190 ("Micros" "Microscopy") 1191 ("Mil" "Military") 1192 ("Min" "Mineralogy") 1193 ("Mir\\. for Mag" "Mirror for Magistrates") 1194 ("MLG" "Middle Low German") 1195 ("Moham" "Mohammedan") 1196 ("Mozley & W" "Mozley & Whiteley") 1197 ("Mus" "Music") 1198 ("Myst" "Mysteries") 1199 ("Myth" "Mythology") 1200 ;; N 1201 ("Nat\\. Hist" "Natural History") 1202 ("Nat\\. ord" "Natural order") 1203 ("Naut" "Nautical") 1204 ("Nav" "Navy") 1205 ("Navig" "Navigation") 1206 ("N\\. Brit\\. Rev" "North British Review") 1207 ("Neh" "Nehemiah") 1208 ("neut" "neuter") 1209 ("New Am\\. Cyc" "New American Cyclopedia") 1210 ("New Month\\. Mag" "New Monthly Magazine") 1211 ("NF" "New French") 1212 ("NGr" "Mew Greek") 1213 ("NHeb" "New Hebrew") 1214 ("NL" "New Latin") 1215 ("nom" "nominative") 1216 ("Norm\\. F" "Norman French") 1217 ("North Am\\. Rev" "North American Review") 1218 ("Norw" "Norwegian") 1219 ("Num" "Numbers") 1220 ("Numis" "Numismatics") 1221 ("N" "New") 1222 ;; O 1223 ("O" "Old") 1224 ("Ob" "Obadiah") 1225 ("obs" "obsolete") 1226 ("Obsoles" "Obsolescent") 1227 ("OCelt" "Old Celtic") 1228 ("OD" "Old Dutch") 1229 ("ODan" "Old Danish") 1230 ("OE" "Old English") 1231 ("OF" "Old French") 1232 ("OFelm" "Old Flemish") 1233 ("OFris" "Old Frisian") 1234 ("OFries" "Old Friesic") 1235 ("OGael" "Old Gaelic") 1236 ("OGr" "Old Greek") 1237 ("OHG" "Old High German") 1238 ("OIcel" "Old Icelandic") 1239 ("OIt" "Old Italian") 1240 ("OL" "Old Latin") 1241 ("OIr" "Old Irish") 1242 ("ON" "Old Norse") 1243 ("OLG" "Old Low German") 1244 ("OPer" "Old Persian") 1245 ("OPg" "Old Portuguese") 1246 ("OPol" "Old Polish") 1247 ("Opt" "Optics") 1248 ("orig" "original") 1249 ("Ornith" "Ornithology") 1250 ("OS" "Old Saxon") 1251 ("OSlav" "Old Slavic") 1252 ("OSp" "Old Spanish") 1253 ("Oxf\\. Gloss" "Oxford Glossary of Architecture") 1254 ;; P 1255 ("p\\.[\n ]*a" "participial adjective") 1256 ("Paint" "Painting") 1257 ("Paleon" "Paleontology") 1258 ("pass" "passive") 1259 ("Pathol" "Pathology") 1260 ("P\\. Cyc" "Penny Cyclopedia") 1261 ("Per" "Persian") 1262 ("perh" "perhaps") 1263 ("pers" "person") 1264 ("Persp" "Perspective") 1265 ("Pert" "Pertaining") 1266 ("Peruv" "Peruvian") 1267 ("Pet" "Peter") 1268 ("Pg" "Portuguese") 1269 ("Pharm" "Pharmacy, Pharmacopœia") 1270 ("Phil" "Phillipians") 1271 ("Philem" "Philemon") 1272 ("Philol" "Philology") 1273 ("Philos" "Philosophy") 1274 ("Phon" "Phonetics") 1275 ("Photog" "Photography") 1276 ("Photom" "Photometry") 1277 ("Phren" "Phrenology") 1278 ("Phys" "Physics") 1279 ("Phys\\. Sci" "Physical Science") 1280 ("Physiol" "Physiology") 1281 ("pl" "plural") 1282 ("Poet" "Poetry, Poetical") 1283 ("Pol" "Polish") 1284 ("Pol\\. Econ" "Political Economy") 1285 ("Polit\\. Econ" "Political Economy") 1286 ("Pop\\. Sci\\. Monthly" "Polular Science Monthly") 1287 ("Poss" "Possessive") 1288 ("pp" "pages") 1289 ("P\\. Plowman" "Piers Plowman") 1290 ("p\\.[\n ]*p" "past participle") 1291 ("p\\.[\n ]*pr" "present participle") 1292 ("p\\.[\n ]*ple" "present participle") 1293 ("Pr" "Provençal") 1294 ("Pref" "Preface") 1295 ("pref" "prefix") 1296 ("prep" "preposition") 1297 ("pres" "present") 1298 ("pret" "preterit") 1299 ("prin" "principally") 1300 ("Print" "Printing") 1301 ("priv" "privative") 1302 ("prob" "probably") 1303 ("pron" "pronoun") 1304 ("prop" "properly") 1305 ("Pros" "Prosody") 1306 ("prov" "provincial") 1307 ("Prov" "Provincial") 1308 ("Prov" "Proverbs") 1309 ("Ps\\., Psa" "Psalms") 1310 ("Pyro\\.-elect" "Pyro-electricity") 1311 ("p" "participle") 1312 ;; Q 1313 ("Quart\\. Rev" "Quarterly Review") 1314 ("q\\. v" "quod vide (which see)") 1315 ;; R 1316 ("R\\. C" "Roman Catholic") 1317 ("R\\. C\\. Ch" "Roman Catholic Church") 1318 ("Rep\\. Sec\\. of War" "Report of Secretary of War") 1319 ("Rev" "Revelation") 1320 ("Rev" "Review") 1321 ("Rev\\. Ver" "Revised Version (of the Bible)") 1322 ("Rhet" "Rhetoric") 1323 ("R\\. of Brunne" "Robert of Brunne") 1324 ("R\\. of Gl" "Robert of Gloucester") 1325 ("Rom" "Roman, Romans") 1326 ("Rom\\. Cath" "Roman Catholic") 1327 ("Rom\\. of R" "Romaunt of the Rose") 1328 ("Rpts" "reports") 1329 ("Russ" "Russian") 1330 ("R" "Rare") 1331 ;; S 1332 ("Sam" "Samaritan") 1333 ("Sam" "Samuel") 1334 ("Sat\\. Rev" "Saturday Review") 1335 ("Sax" "Saxon") 1336 ("sc" "scilicet (being understood)") 1337 ("Scand" "Scandinavian") 1338 ("Sci" "Science") 1339 ("Sci\\. Am" "Scientific American") 1340 ("Scot" "Scotland, Scottish") 1341 ("Script" "Scripture, Scriptural") 1342 ("Sculp" "Sculpture") 1343 ("Serb" "Serbian") 1344 ("Serv" "Servian") 1345 ("Shak" "Shakespeare") 1346 ("sing" "singular") 1347 ("Skr" "Sanskrit") 1348 ("Slav" "Slavonic") 1349 ("Sp" "Spanish") 1350 ("Specif" "Specifically") 1351 ("Stat" "Statuary") 1352 ("subj" "subjunctive") 1353 ("superl" "superlative") 1354 ("Surg" "Surgery") 1355 ("Surv" "Surveying") 1356 ("Sw" "Swedish") 1357 ("Syd\\. Soc\\. Lex" "Sydenham Society Lexicon") 1358 ("Syn" "Synonyms") 1359 ("Synop" "Synopsis") 1360 ("Syr" "Syriac") 1361 ;; T 1362 ("Tart" "Tartaric") 1363 ("Teleg" "Telegraphy") 1364 ("term" "termination") 1365 ("Test" "Testament") 1366 ("Theol" "Theology") 1367 ("Thes" "Thessalonians") 1368 ("Tim" "Timothy") 1369 ("Todd & B" "Todd & Bowman") 1370 ("Trans" "Translation") 1371 ("Treas" "Treasury") 1372 ("Trig" "Trigonometry") 1373 ("Turk" "Turkish") 1374 ("Typog" "Typography") 1375 ;; U 1376 ("Univ" "University") 1377 ("Up" "Upper") 1378 ("U\\. ?S" "United States") 1379 ("U\\. ?S\\. Disp" "United States Dispensatory") 1380 ("U\\. ?S\\. Pharm" "United States Pharmacopœia") 1381 ("U\\. ?S\\. Int\\. Rev\\. Statutes" "United States Internal Revenue Statutes") 1382 ("usu" "usually") 1383 ;; V 1384 ("v\\.[\n ]*i" "intransitive verb") 1385 ("v\\.[\n ]*t" "transitive verb") 1386 ("var" "variety") 1387 ("vb\\.[\n ]*n" "verbal noun") 1388 ("Veter" "Veterinary") 1389 ("Vitr" "Vitruvius") 1390 ;; W 1391 ("W" "Welsh") 1392 ("Wall" "Wallachian") 1393 ("Westm\\. Cat" "Westminster Catechism") 1394 ("Westm\\. Rev" "Westminster Review") 1395 ;; Z 1396 ("Zech" "Zechariah") 1397 ("Zeph" "Zephaniah") 1398 ("Zoöl" "Zoölogy") 1399 ;; Reordered for correctness 1400 ("n" "noun") 1401 ("v" "verb")))) 1402 (dolist (abbrev abbreviations) 1403 (setq content 1404 (replace-regexp-in-string 1405 (concat "\\b" (car abbrev) "\\.") 1406 (cadr abbrev) 1407 content t))))) 1408 content)) 1409 1410 (defun lexic-format-webster-diacritics (pronunciation) 1411 "Replace ascii pronounciation symbols in PRONUNCIATION with unicode equivalents." 1412 (let ((diacritics 1413 '(("[,C]" "Ç") 1414 ("\"u" "ü") ; uum 1415 ("'e" "é") ; eacute 1416 ("\\^a" "â") ; acir 1417 ("\"a" "ä") ; aum 1418 ("`a" "à") ; agrave 1419 ("\\*a" "å") ; aring 1420 ("\\*u" "ů") ; uring 1421 (",c" "ç") ; ccedil 1422 ("cced" "ç") 1423 ("\\^e" "ê") ; ecir 1424 ("\"e" "ë") ; eum 1425 ("`e" "è") ; egrave 1426 ("\"i" "ï") ; ium 1427 ("\\^i" "î") ; icir 1428 ("`i" "ì") ; igrave 1429 ("\"A" "Ä") ; Aum 1430 ("\\*A" "Å") ; Aring 1431 ("'E" "È") ; Eacute 1432 ("ae" "æ") ; ae 1433 ("AE" "Æ") ; AE 1434 ("\\^o" "ô") ; ocir 1435 ("\"o" "ö") ; oum 1436 ("`o" "ò") ; ograve 1437 ("'o" "ó") ; oacute 1438 ("Oacute" "Ó") 1439 ("\\^u" "û") ; ucir 1440 ("`u" "ù") ; ugrave 1441 ("'u" "ú") ; uacute 1442 ("\"y" "ÿ") ; yum 1443 ("\"O" "Ö") ; Oum 1444 ("\"U" "Ü") 1445 ("pound" "£") 1446 ("'a" "á") ; aacute 1447 ("'i" "í") ; iacute 1448 ("frac23" "⅔") 1449 ("frac13" "⅓") 1450 ("frac12" "½") 1451 ("frac14" "¼") 1452 ("\\?" "�") ; Place-holder for unknown or illegible character. 1453 ("hand" "☞") ; pointing hand (printer's u"fist") ; hand ; and 1454 ("\\.\\.\\." "…") 1455 ("\\*\\*\\*\\*\\*\\*\\*\\*" "✶") 1456 ("sect" "§") ; sect 1457 ("=a" "ā") ; amac 1458 ("ng" "ṉ") ; u"n sub-macron" ; nsm 1459 ("sharp" "♯") ; sharp 1460 ("flat" "♭") ; flat 1461 ("th" "th") ; th 1462 ("=i" "ī") ; imac 1463 ("imac" "ī") ; imac 1464 ("=e" "ē") ; emac 1465 ("\\.d" "ḍ") ; Sanskrit/Tamil d dot 1466 ("\\.n" "ṇ") ; Sanskrit/Tamil n dot ; nsdot 1467 ("\\.t" "ṭ") ; Sanskrit/Tamil t dot ; tsdot 1468 ("a\\^" "ă") ; acr 1469 ("e\\^" "ĕ") ; ecr 1470 ("i\\^" "ĭ") ; icr 1471 ("o\\^" "ŏ") ; ocr 1472 ("!o" "ǒ") 1473 ("OE" "Œ") ; OE 1474 ("oe" "œ") ; oe 1475 ("=O" "Ō") ; Omac 1476 ("=o" "ō") ; omac 1477 ("=u" "ū") ; umac 1478 ("ocar" "ǒ") ; o hacek 1479 ("=ae" "ǣ") ; aemac 1480 ("u\\^" "ŭ") ; ucr 1481 ("a\\^" "ă") 1482 ("=y" "ȳ") ; ymac 1483 ("asl" "a") ; FIXME: a u"semilong" (has a macron above with a short 1484 ("-e" "e") ; FIXME: e u"semilong" ; esl 1485 ("-i" "i") ; FIXME: i u"semilong" ; isl 1486 ("-o" "o") ; FIXME: o u"semilong" ; osl 1487 ("-u" "u") ; FIXME: u u"semilong" ; usl 1488 ("-n" "ṉ") ; nsmac 1489 ("\\.a" "ȧ") ; a with dot above ; adot 1490 ("\\.c" "ċ") ; cdot 1491 ("\\.h" "ḥ") ; hsdot 1492 ("\\.m" "ṃ") ; msdot 1493 ("\\.u" "ụ") ; usdot 1494 ("\\.z" "ẓ") ; zsdot 1495 ("Eth" "Ð") ; EDH 1496 ("eth" "ð") ; edh 1497 ("thorn" "þ") ; thorn 1498 ("~a" "ã") ; atil 1499 ("~e" "ẽ") ; etil 1500 ("itil" "ĩ") 1501 ("otil" "õ") 1502 ("util" "ũ") 1503 ("~n" "ñ") ; ntil 1504 ("Atil" "Ã") 1505 ("Etil" "Ẽ") 1506 ("Itil" "Ĩ") 1507 ("Otil" "Õ") 1508 ("Util" "Ũ") 1509 ("~N" "Ñ") ; Ntil 1510 ("\\.n" "ṅ") ; ndot 1511 ("\\.r" "ṛ") ; rsdot 1512 ("yogh" "ȝ") ; yogh 1513 ("deg" "°") 1514 ("middot" "•") 1515 ("root" "√") 1516 ;; Greek letters 1517 ("alpha" "α") 1518 ("beta" "β") 1519 ("gamma" "γ") 1520 ("delta" "δ") 1521 ("epsilon" "ε") 1522 ("zeta" "ζ") 1523 ("eta" "η") 1524 ("theta" "θ") 1525 ("iota" "ι") 1526 ("approx" "κ") ; ap 1527 ("lambda" "λ") 1528 ("mu" "μ") 1529 ("nu" "ν") 1530 ("xi" "ξ") 1531 ("omicron" "ο") 1532 ("pi" "π") 1533 ("rho" "ρ") 1534 ("sigma" "σ") 1535 ("sigmat" "ς") 1536 ("tau" "τ") 1537 ("upsilon" "υ") 1538 ("phi" "φ") 1539 ("chi" "χ") 1540 ("psi" "ψ") 1541 ("omega" "ω") 1542 ("digamma" "ϝ") 1543 ("ALPHA" "Α") 1544 ("BETA" "Β") 1545 ("Gamma" "Γ") ; GAMMA 1546 ("Delta" "Δ") ; DELTA 1547 ("EPSILON" "Ε") 1548 ("ZETA" "Ζ") 1549 ("ETA" "Η") 1550 ("Theta" "Θ") ; THETA 1551 ("IOTA" "Ι") 1552 ("KAPPA" "Κ") 1553 ("Lambda" "Λ") ; LAMBDA 1554 ("MU" "Μ") 1555 ("NU" "Ν") 1556 ("XI" "Ξ") 1557 ("Omicron" "Ο") ; OMICRON 1558 ("Pi" "Π") ; PI 1559 ("RHO" "Ρ") 1560 ("Sigma" "Σ") ; SIGMA 1561 ("Tau" "Τ") ; TAU 1562 ("Upsilon" "Υ") ; UPSILON 1563 ("PHI" "Φ") 1564 ("Chi" "Χ") ; CHI 1565 ("PSI" "Ψ") 1566 ("Omega" "Ω") ; OMEGA 1567 ;; FIXME: Vowels with a double dot below. There`s nothing suitable in the Unicode 1568 ("add" "a") 1569 ("udd" "u") 1570 ("ADD" "A") 1571 ("UDD" "U") 1572 ;; Quotes 1573 ("dagger" "†") 1574 ("dag" "†") 1575 ("u\\^" "§") ; par 1576 ("and" "and") 1577 ("or" "or") 1578 ("sec" "˝") 1579 ("[,C]" "Ç") 1580 ("\"u" "ü") ; uum 1581 ("'e" "é") ; eacute 1582 ("\\^a" "â") ; acir 1583 ("\"a" "ä") ; aum 1584 ("`a" "à") ; agrave 1585 ("\\*a" "å") ; aring 1586 ("\\*u" "ů") ; uring 1587 (",c" "ç") ; ccedil 1588 ("cced" "ç") 1589 ("\\^e" "ê") ; ecir 1590 ("\"e" "ë") ; eum 1591 ("`e" "è") ; egrave 1592 ("\"i" "ï") ; ium 1593 ("\\^i" "î") ; icir 1594 ("`i" "ì") ; igrave 1595 ("\"A" "Ä") ; Aum 1596 ("\\*A" "Å") ; Aring 1597 ("'E" "È") ; Eacute 1598 ("ae" "æ") ; ae 1599 ("AE" "Æ") ; AE 1600 ("\\^o" "ô") ; ocir 1601 ("\"o" "ö") ; oum 1602 ("`o" "ò") ; ograve 1603 ("'o" "ó") ; oacute 1604 ("Oacute" "Ó") 1605 ("\\^u" "û") ; ucir 1606 ("`u" "ù") ; ugrave 1607 ("'u" "ú") ; uacute 1608 ("\"y" "ÿ") ; yum 1609 ("\"O" "Ö") ; Oum 1610 ("\"U" "Ü") 1611 ("pound" "£") 1612 ("'a" "á") ; aacute 1613 ("'i" "í") ; iacute 1614 ("frac23" "⅔") 1615 ("frac13" "⅓") 1616 ("frac12" "½") 1617 ("frac14" "¼") 1618 ;; ("\\?" "�") ; Place-holder for unknown or illegible character. 1619 ("hand" "☞") ; pointing hand (printer's u"fist") ; hand ; and 1620 ("sect" "§") ; sect 1621 ("=a" "ā") ; amac 1622 ("ng" "ṉ") ; u"n sub-macron" ; nsm 1623 ("sharp" "♯") ; sharp 1624 ("flat" "♭") ; flat 1625 ("th" "th") ; th 1626 ("=i" "ī") ; imac 1627 ("=e" "ē") ; emac 1628 ("\\.d" "ḍ") ; Sanskrit/Tamil d dot 1629 ("\\.n" "ṇ") ; Sanskrit/Tamil n dot ; nsdot 1630 ("\\.t" "ṭ") ; Sanskrit/Tamil t dot ; tsdot 1631 ("a\\^" "ă") ; acr 1632 ("e\\^" "ĕ") ; ecr 1633 ("i\\^" "ĭ") ; icr 1634 ("o\\^" "ŏ") ; ocr 1635 ("!o" "ǒ") 1636 ("OE" "Œ") ; OE 1637 ("oe" "œ") ; oe 1638 ("=O" "Ō") ; Omac 1639 ("=o" "ō") ; omac 1640 ("=u" "ū") ; umac 1641 ("ocar" "ǒ") ; o hacek 1642 ("=ae" "ǣ") ; aemac 1643 ("u\\^" "ŭ") ; ucr 1644 ("a\\^" "ă") 1645 ("=y" "ȳ") ; ymac 1646 ("asl" "a") ; FIXME: a u"semilong" (has a macron above with a short 1647 ("-e" "e") ; FIXME: e u"semilong" ; esl 1648 ("-i" "i") ; FIXME: i u"semilong" ; isl 1649 ("-o" "o") ; FIXME: o u"semilong" ; osl 1650 ("-u" "u") ; FIXME: u u"semilong" ; usl 1651 ("-n" "ṉ") ; nsmac 1652 ("\\.a" "ȧ") ; a with dot above ; adot 1653 ("\\.c" "ċ") ; cdot 1654 ("\\.h" "ḥ") ; hsdot 1655 ("\\.m" "ṃ") ; msdot 1656 ("\\.u" "ụ") ; usdot 1657 ("\\.z" "ẓ") ; zsdot 1658 ("Eth" "Ð") ; EDH 1659 ("eth" "ð") ; edh 1660 ("thorn" "þ") ; thorn 1661 ("~a" "ã") ; atil 1662 ("~e" "ẽ") ; etil 1663 ("itil" "ĩ") 1664 ("otil" "õ") 1665 ("util" "ũ") 1666 ("~n" "ñ") ; ntil 1667 ("Atil" "Ã") 1668 ("Etil" "Ẽ") 1669 ("Itil" "Ĩ") 1670 ("Otil" "Õ") 1671 ("Util" "Ũ") 1672 ("~N" "Ñ") ; Ntil 1673 ("\\.n" "ṅ") ; ndot 1674 ("\\.r" "ṛ") ; rsdot 1675 ("yogh" "ȝ") ; yogh 1676 ("deg" "°") 1677 ("middot" "•") 1678 ("root" "√") 1679 ;; Greek letters 1680 ("alpha" "α") 1681 ("beta" "β") 1682 ("gamma" "γ") 1683 ("delta" "δ") 1684 ("epsilon" "ε") 1685 ("zeta" "ζ") 1686 ("eta" "η") 1687 ("theta" "θ") 1688 ("iota" "ι") 1689 ("approx" "κ") ; ap 1690 ("lambda" "λ") 1691 ("mu" "μ") 1692 ("nu" "ν") 1693 ("xi" "ξ") 1694 ("omicron" "ο") 1695 ("pi" "π") 1696 ("rho" "ρ") 1697 ("sigma" "σ") 1698 ("sigmat" "ς") 1699 ("tau" "τ") 1700 ("upsilon" "υ") 1701 ("phi" "φ") 1702 ("chi" "χ") 1703 ("psi" "ψ") 1704 ("omega" "ω") 1705 ("digamma" "ϝ") 1706 ("ALPHA" "Α") 1707 ("BETA" "Β") 1708 ("Gamma" "Γ") ; GAMMA 1709 ("Delta" "Δ") ; DELTA 1710 ("EPSILON" "Ε") 1711 ("ZETA" "Ζ") 1712 ("ETA" "Η") 1713 ("Theta" "Θ") ; THETA 1714 ("IOTA" "Ι") 1715 ("KAPPA" "Κ") 1716 ("Lambda" "Λ") ; LAMBDA 1717 ("MU" "Μ") 1718 ("NU" "Ν") 1719 ("XI" "Ξ") 1720 ("Omicron" "Ο") ; OMICRON 1721 ("Pi" "Π") ; PI 1722 ("RHO" "Ρ") 1723 ("Sigma" "Σ") ; SIGMA 1724 ("Tau" "Τ") ; TAU 1725 ("Upsilon" "Υ") ; UPSILON 1726 ("PHI" "Φ") 1727 ("Chi" "Χ") ; CHI 1728 ("PSI" "Ψ") 1729 ("Omega" "Ω") ; OMEGA 1730 ;; FIXME: Vowels with a double dot below. There`s nothing suitable in the Unicode 1731 ("add" "a") 1732 ("udd" "u") 1733 ("ADD" "A") 1734 ("UDD" "U") 1735 ;; Quotes 1736 ("dagger" "†") 1737 ("dag" "†") 1738 ("u\\^" "§") ; par 1739 ("and" "and") 1740 ("or" "or") 1741 ("times" "×") 1742 ("sec" "˝")))) 1743 (setq pronunciation 1744 (replace-regexp-in-string 1745 "\\.\\.\\." "…" 1746 (replace-regexp-in-string 1747 "\\*\\*\\*\\*\\*\\*\\*\\*" "✶" 1748 pronunciation))) 1749 (dolist (dcrt diacritics) 1750 (setq pronunciation (replace-regexp-in-string 1751 (concat "\\[" (car dcrt) "\\]") 1752 (cadr dcrt) 1753 pronunciation t))) 1754 pronunciation)) 1755 1756 (defun lexic-format-reflow-text (text max-width &optional min-width initial-colunm indent sepregex) 1757 "Add newlines as required to ensure that TEXT never exceeds MAX-WIDTH columns. 1758 1759 Can also set a MIN-WIDTH for new lines of text created by a line break, 1760 an INITIAL-COLUNM that the text starts at, and an INDENT string to be inserted 1761 after every line break. 1762 1763 When regex SEPREGEX is provided, it is used to detect word separators. 1764 It is \"[ ,.]\" by default." 1765 (let* ((initial-col (or initial-colunm 0)) 1766 (min-width (or min-width 1)) 1767 (indent (or indent "")) 1768 (sepregex (or sepregex "[ ,.]")) 1769 (line-regex (format "\\(\\`.\\{%d,%d\\}\\(?:%s\\(?:.\\{1,%d\\}\\'\\)?\\|\\'\\)\\|.\\{%d\\}\\)\\(.*\\)" 1770 min-width (- max-width (length indent)) 1771 sepregex min-width (- max-width (length indent)))) 1772 reflowed-text) 1773 (setq text (replace-regexp-in-string "[[:space:]]+" " " text)) 1774 (setq text 1775 (if (> initial-col max-width) 1776 (replace-regexp-in-string "\\` " "" text) 1777 (replace-regexp-in-string 1778 (format "\\`.\\{%d,%d\\}%s\\(?:.\\{1,%d\\}\\'\\)?\\|\\`" 1779 (min min-width (- max-width initial-col)) (- max-width initial-col) 1780 sepregex min-width) 1781 (lambda (match) 1782 (setq reflowed-text match) 1783 "") 1784 text))) 1785 (while (not (string-empty-p text)) 1786 (setq text 1787 (if (<= (length text) max-width) 1788 (progn (setq reflowed-text (concat reflowed-text 1789 (unless (string-empty-p reflowed-text) 1790 (concat "\n" indent)) 1791 text)) "") 1792 (replace-regexp-in-string 1793 line-regex 1794 (lambda (match) 1795 (setq reflowed-text (concat reflowed-text "\n" indent (match-string 1 match))) 1796 (match-string 2 match)) 1797 text)))) 1798 reflowed-text)) 1799 1800 (defun lexic-format-online-etym (entry &optional _expected-word) 1801 "Make an html ENTRY look nice. 1802 Designed for an export of Douglas Harper's Online Etymology Dictionary, 1803 collected using https://framagit.org/tuxor1337/dictmaster." 1804 (thread-last 1805 (string-join 1806 (delq nil 1807 (mapcar 1808 (lambda (e) 1809 (when (string= (plist-get e :dict) 1810 (plist-get entry :dict)) 1811 (plist-get e :info))) 1812 (lexic-parse-results 1813 (lexic-oneshot-lookup 1814 (replace-regexp-in-string " ?(.*)" " (*)" (plist-get entry :word)) ; lexic accepts a glob 1815 t (list "-0" "-u" (plist-get entry :dict))))))) 1816 (replace-regexp-in-string 1817 "\\(?:\\`\\|\n\n\\)<b>\\(.+?\\) (\\(.+?\\)\\([0-9]+\\)?)</b> ?" 1818 (lambda (match) 1819 (let ((word (match-string 1 match)) 1820 (pos (lexic-format-expand-abbreviations (match-string 2 match))) 1821 (index (match-string 3 match))) 1822 (concat "\n\n\u200B\u200B\u200B" 1823 (propertize word 'face 'bold) 1824 " " 1825 (propertize pos 'face '(bold font-lock-keyword-face)) 1826 (when index 1827 (propertize (concat " " index) 'face '(italic font-lock-doc-face))) 1828 "\u2008\n\n")))) 1829 (replace-regexp-in-string 1830 "<i>\\(.*?\\)</i>" 1831 (lambda (match) (propertize (match-string 1 match) 'face 'italic))) 1832 (replace-regexp-in-string 1833 "<b>\\(.*?\\)</b>" 1834 (lambda (match) (propertize (match-string 1 match) 'face 'bold))) 1835 (replace-regexp-in-string 1836 "<strong>\\(.*?\\)</strong>" 1837 (lambda (match) (propertize (match-string 1 match) 'face 'font-lock-constant-face))) 1838 (replace-regexp-in-string 1839 "<a href=\".*?\">\\(.*?\\)</a>" 1840 (lambda (match) (propertize (match-string 1 match) 'face 'font-lock-keyword-face))) 1841 (replace-regexp-in-string 1842 "<span style=\".*?\">\\(.*?\\)</span>" 1843 (lambda (match) (propertize (match-string 1 match) 'face 'font-lock-doc-face))) 1844 (replace-regexp-in-string 1845 "[0-9]\\{4\\}s?\\|[0-9]+c\\." 1846 (lambda (match) (propertize match 'face 'font-lock-string-face))) 1847 (replace-regexp-in-string 1848 "<span>\\(.*?\\)</span>\\( (.+?)\\)?" 1849 (lambda (match) 1850 (let ((linked (match-string 1 match)) 1851 (pos (lexic-format-expand-abbreviations (match-string 2 match)))) 1852 (concat 1853 (propertize linked 'face 'font-lock-keyword-face) 1854 (when pos (propertize (replace-regexp-in-string "\\([0-9]+\\))" " \\1)" pos) 1855 'face '(bold diff-context))))))) 1856 (replace-regexp-in-string 1857 "<blockquote>\\(.+?\\) ?\\[\\(.+\\)\\]</blockquote>" 1858 (lambda (match) 1859 (concat "❝" 1860 (propertize 1861 (lexic-format-reflow-text 1862 (match-string 1 match) 80 5 1 " ") 1863 'face 'diff-context) 1864 "❞\n" 1865 (propertize (concat " ──" 1866 (lexic-format-reflow-text (match-string 2 match) 1867 75 5 3 " ")) 1868 'face '(italic font-lock-type-face))))) 1869 (replace-regexp-in-string "<br/>\n?<br/>" "\n") 1870 (replace-regexp-in-string 1871 "<p>\\(.*?\\)</p>" 1872 (lambda (match) 1873 (concat 1874 (lexic-format-reflow-text (match-string 1 match) 1875 80 5) 1876 "\n"))) 1877 (replace-regexp-in-string "</?p>" "") ; any straggling pars 1878 (replace-regexp-in-string 1879 "^.\\{86,\\}" 1880 (lambda (match) 1881 (lexic-format-reflow-text match 80 5))))) 1882 1883 (defun lexic-format-element (entry &optional _expected-word) 1884 "Make an ENTRY for an element Look nice. 1885 Based on http://download.huzheng.org/dict.org/stardict-dictd_www.dict.org_elements-2.4.2.tar.bz2." 1886 (replace-regexp-in-string 1887 "^\\([a-z]+\\) 1888 Symbol: \\([A-Za-z]+\\) 1889 Atomic number: \\([0-9]+\\) 1890 Atomic weight: \\((?[0-9.]+)?\\)" 1891 (lambda (match) 1892 (let ((element (match-string 1 match)) 1893 (symbol (match-string 2 match)) 1894 (number (match-string 3 match)) 1895 (weight (match-string 4 match))) 1896 (format 1897 "┌────────────────┐ 1898 │ %-3s %10s │ 1899 │ %s %11s │ 1900 └────────────────┘ 1901 " 1902 (propertize number 'face 'font-lock-function-name-face) 1903 (propertize weight 'face 'font-lock-comment-face) 1904 (propertize symbol 'face '(bold font-lock-keyword-face)) 1905 (propertize element 'face 'font-lock-string-face)))) 1906 (plist-get entry :info))) 1907 1908 (defun lexic-format-soule (entry &optional _expected-word) 1909 "Format an ENTRY for WORD in Soule's Dictionary of English Synonyms. 1910 Designed using http://download.huzheng.org/bigdict/stardict-Soule_s_Dictionary_of_English_Synonyms-2.4.2.tar.bz2." 1911 (thread-last (plist-get entry :info) 1912 (replace-regexp-in-string ; categories 1913 "^\\([IVX]+\\. \\)?\\([a-z.;& ]+\\)" 1914 (lambda (match) 1915 (concat 1916 "\u200B\u200B\u200B" 1917 (when case-fold-search 1918 (propertize 1919 'face '(bold font-lock-constant-face))) 1920 (propertize (lexic-format-expand-abbreviations (match-string 2 match)) 1921 'face '(bold font-lock-keyword-face)) 1922 "\u2008"))) 1923 (replace-regexp-in-string 1924 "^\\([0-9]+\\.\\) \n\\([^,.]*,?\\)" 1925 (lambda (match) 1926 (concat 1927 "\u200B\u200B\u200B\u200B" 1928 (propertize (match-string 1 match) 1929 'face '(bold font-lock-string-face)) 1930 " " 1931 (match-string 2 match) 1932 "\u2008"))) 1933 (replace-regexp-in-string 1934 "^\\(.\\{81\\}\\)" 1935 (lambda (match) 1936 (lexic-format-reflow-text match 80 1 0 " " "[, ]"))) 1937 (replace-regexp-in-string 1938 "," 1939 (propertize "," 'face 'font-lock-type-face)))) 1940 1941 ;;;###autoload 1942 (defun lexic-dictionary-help () 1943 "Show the Lexic help page." 1944 (interactive) 1945 (let ((dict-help-buf (get-buffer-create "*lexic-dict-help*"))) 1946 (with-current-buffer dict-help-buf 1947 (setq buffer-read-only t) 1948 (with-silent-modifications 1949 (erase-buffer) 1950 (insert "#+title: Lexic Dictionary Help 1951 #+author: TEC 1952 1953 * sdcv 1954 1955 First you want to make sure you have the *stardict* CLI tool =sdcv=. 1956 It should be available in [[https://repology.org/project/sdcv/versions][many package repositories]] under that name. 1957 1958 Not that by itself =sdcv= does not come with any dictionaries, you'll need to install those yourself. 1959 1960 * Downloading dictionaries 1961 1962 You can find quite a few dictionaries on http://download.huzheng.org/dict.org/ 1963 and http://download.huzheng.org/bigdict/. By default, Lexic will provide nice 1964 formatting for some of these. Namely: 1965 1966 | Dictionary | Recognised ~bookname~ | 1967 |------------------------+------------------------------------------------| 1968 | [[http://download.huzheng.org/dict.org/stardict-dictd-web1913-2.4.2.tar.bz2.][Webster 1913]] | =Webster's Revised Unabridged Dictionary (1913)= | 1969 | [[http://download.huzheng.org/dict.org/stardict-dictd_www.dict.org_elements-2.4.2.tar.bz2][Elements]] | =Elements database= | 1970 | Hitchcock's Bible Name | =Hitchcock's Bible Names Dictionary= | 1971 | Online Etymology | =Online Etymology Dictionary= | 1972 | [[http://download.huzheng.org/bigdict/stardict-Soule_s_Dictionary_of_English_Synonyms-2.4.2.tar.bz2][Soule's Synonyms]] | =Soule's Dictionary of English Synonyms= | 1973 1974 The stardict form of the /Online Etymology Dictionary/ can be created by running 1975 [[https://framagit.org/tuxor1337/dictmaster][dictmaster]]. Unfortunately I do not know of any nice hosted result of this. 1976 1977 * Installing dictionaries 1978 1979 By default =sdcv= will look for dictionaries in =$HOME/.stardict/dic=, (do take note 1980 of the =dic= part of the path) but this can be changed by setting environment 1981 variable =STARDICT_DATA_DIR=. 1982 1983 I recommend creating a folder for each dictionary in =STARDICT_DATA_DIR=, for 1984 example: 1985 1986 #+begin_example 1987 $STARDICT_DATA_DIR 1988 ├── elements 1989 │ └── ... 1990 ├── etymology 1991 │ └── ... 1992 ├── hitchcock 1993 │ └── ... 1994 ├── synonyms 1995 │ ├── SoulesSynonyms.dict.dz 1996 │ ├── SoulesSynonyms.idx 1997 │ └── SoulesSynonyms.ifo 1998 └── webster 1999 └── ... 2000 #+end_example 2001 2002 A particular dictionary is composed of a few files: 2003 + =DICT.dict.dz= 2004 + =DICT.idx= 2005 + =DICT.ifo= 2006 2007 For our purposes, we only care about the =.ifo= (info) file. It should look 2008 something like this. 2009 #+begin_example 2010 StarDict's dict ifo file 2011 version=2.4.2 2012 wordcount=160161 2013 idxfilesize=3024035 2014 bookname=Webster's Revised Unabridged Dictionary (1913) 2015 description=Connoisseur's reference to American English 2016 date=2007.8.28 2017 sametypesequence=m 2018 #+end_example 2019 2020 We are particularly interested in the ~bookname=~ line, as if you have one of the 2021 [[Downloading dictionaries][recognised dictionaries]] in order to get the nice formatting you need to set the 2022 ~bookname~ to the recognised value. 2023 2024 * Using Lexic 2025 2026 Once you have =sdcv= and some dictionaries installed, just go ahead and try =M-x 2027 lexic-search= 🙂.")) 2028 (goto-char (point-min)) 2029 (org-mode) 2030 (set-transient-map special-mode-map t)) 2031 (switch-to-buffer-other-window dict-help-buf))) 2032 2033 (provide 'lexic) 2034 ;;; lexic.el ends here