dotemacs

My Emacs configuration
git clone git://git.entf.net/dotemacs
Log | Files | Refs | LICENSE

slynk-source-path-parser.lisp (9688B)


      1 ;;;; Source-paths
      2 
      3 ;;; CMUCL/SBCL use a data structure called "source-path" to locate
      4 ;;; subforms.  The compiler assigns a source-path to each form in a
      5 ;;; compilation unit.  Compiler notes usually contain the source-path
      6 ;;; of the error location.
      7 ;;;
      8 ;;; Compiled code objects don't contain source paths, only the
      9 ;;; "toplevel-form-number" and the (sub-) "form-number".  To get from
     10 ;;; the form-number to the source-path we need the entire toplevel-form
     11 ;;; (i.e. we have to read the source code).  CMUCL has already some
     12 ;;; utilities to do this translation, but we use some extended
     13 ;;; versions, because we need more exact position info.  Apparently
     14 ;;; Hemlock is happy with the position of the toplevel-form; we also
     15 ;;; need the position of subforms.
     16 ;;;
     17 ;;; We use a special readtable to get the positions of the subforms.
     18 ;;; The readtable stores the start and end position for each subform in
     19 ;;; hashtable for later retrieval.
     20 ;;;
     21 ;;; This code has been placed in the Public Domain.  All warranties
     22 ;;; are disclaimed.
     23 
     24 ;;; Taken from slynk-cmucl.lisp, by Helmut Eller
     25 
     26 (defpackage slynk-source-path-parser
     27   (:use cl)
     28   (:export
     29    read-source-form
     30    source-path-string-position
     31    source-path-file-position
     32    source-path-source-position
     33 
     34    sexp-in-bounds-p
     35    sexp-ref)
     36   (:shadow ignore-errors))
     37 
     38 (in-package slynk-source-path-parser)
     39 
     40 ;; Some test to ensure the required conformance
     41 (let ((rt (copy-readtable nil)))
     42   (assert (or (not (get-macro-character #\space rt))
     43 	      (nth-value 1 (get-macro-character #\space rt))))
     44   (assert (not (get-macro-character #\\ rt))))
     45 
     46 (eval-when (:compile-toplevel)
     47   (defmacro ignore-errors (&rest forms)
     48     ;;`(progn . ,forms) ; for debugging
     49     `(cl:ignore-errors . ,forms)))
     50 
     51 (defun make-sharpdot-reader (orig-sharpdot-reader)
     52   (lambda (s c n)
     53     ;; We want things like M-. to work regardless of any #.-fu in
     54     ;; the source file that is to be visited. (For instance, when a
     55     ;; file contains #. forms referencing constants that do not
     56     ;; currently exist in the image.)
     57     (ignore-errors (funcall orig-sharpdot-reader s c n))))
     58 
     59 (defun make-source-recorder (fn source-map)
     60   "Return a macro character function that does the same as FN, but
     61 additionally stores the result together with the stream positions
     62 before and after of calling FN in the hashtable SOURCE-MAP."
     63   (lambda (stream char)
     64     (let ((start (1- (file-position stream)))
     65 	  (values (multiple-value-list (funcall fn stream char)))
     66 	  (end (file-position stream)))
     67       #+(or)
     68       (format t "[~D \"~{~A~^, ~}\" ~D ~D ~S]~%"
     69 	      start values end (char-code char) char)
     70       (when values
     71         (destructuring-bind (&optional existing-start &rest existing-end)
     72             (car (gethash (car values) source-map))
     73           ;; Some macros may return what a sub-call to another macro
     74           ;; produced, e.g. "#+(and) (a)" may end up saving (a) twice,
     75           ;; once from #\# and once from #\(. If the saved form
     76           ;; is a subform, don't save it again.
     77           (unless (and existing-start existing-end
     78                        (<= start existing-start end)
     79                        (<= start existing-end end))
     80             (push (cons start end) (gethash (car values) source-map)))))
     81       (values-list values))))
     82 
     83 (defun make-source-recording-readtable (readtable source-map)
     84   (declare (type readtable readtable) (type hash-table source-map))
     85   "Return a source position recording copy of READTABLE.
     86 The source locations are stored in SOURCE-MAP."
     87   (flet ((install-special-sharpdot-reader (rt)
     88 	   (let ((fun (ignore-errors
     89 			(get-dispatch-macro-character #\# #\. rt))))
     90 	     (when fun
     91 	       (let ((wrapper (make-sharpdot-reader fun)))
     92 		 (set-dispatch-macro-character #\# #\. wrapper rt)))))
     93 	 (install-wrappers (rt)
     94 	   (dotimes (code 128)
     95 	     (let ((char (code-char code)))
     96 	       (multiple-value-bind (fun nt) (get-macro-character char rt)
     97 		 (when fun
     98 		   (let ((wrapper (make-source-recorder fun source-map)))
     99 		     (set-macro-character char wrapper nt rt))))))))
    100     (let ((rt (copy-readtable readtable)))
    101       (install-special-sharpdot-reader rt)
    102       (install-wrappers rt)
    103       rt)))
    104 
    105 ;; FIXME: try to do this with *READ-SUPPRESS* = t to avoid interning.
    106 ;; Should be possible as we only need the right "list structure" and
    107 ;; not the right atoms.
    108 (defun read-and-record-source-map (stream)
    109   "Read the next object from STREAM.
    110 Return the object together with a hashtable that maps
    111 subexpressions of the object to stream positions."
    112   (let* ((source-map (make-hash-table :test #'eq))
    113          (*readtable* (make-source-recording-readtable *readtable* source-map))
    114 	 (*read-suppress* nil)
    115 	 (start (file-position stream))
    116 	 (form (ignore-errors (read stream)))
    117 	 (end (file-position stream)))
    118     ;; ensure that at least FORM is in the source-map
    119     (unless (gethash form source-map)
    120       (push (cons start end) (gethash form source-map)))
    121     (values form source-map)))
    122 
    123 (defun starts-with-p (string prefix)
    124   (declare (type string string prefix))
    125   (not (mismatch string prefix
    126 		 :end1 (min (length string) (length prefix))
    127 		 :test #'char-equal)))
    128 
    129 (defun extract-package (line)
    130   (declare (type string line))
    131   (let ((name (cadr (read-from-string line))))
    132     (find-package name)))
    133 
    134 #+(or)
    135 (progn
    136   (assert (extract-package "(in-package cl)"))
    137   (assert (extract-package "(cl:in-package cl)"))
    138   (assert (extract-package "(in-package \"CL\")"))
    139   (assert (extract-package "(in-package #:cl)")))
    140 
    141 ;; FIXME: do something cleaner than this.
    142 (defun readtable-for-package (package)
    143   ;; KLUDGE: due to the load order we can't reference the slynk
    144   ;; package.
    145   (funcall (read-from-string "slynk::guess-buffer-readtable")
    146            (string-upcase (package-name package))))
    147 
    148 ;; Search STREAM for a "(in-package ...)" form.  Use that to derive
    149 ;; the values for *PACKAGE* and *READTABLE*.
    150 ;;
    151 ;; IDEA: move GUESS-READER-STATE to slynk.lisp so that all backends
    152 ;; use the same heuristic and to avoid the need to access
    153 ;; slynk::guess-buffer-readtable from here.
    154 (defun guess-reader-state (stream)
    155   (let* ((point (file-position stream))
    156 	 (pkg *package*))
    157     (file-position stream 0)
    158     (loop for read-line = (read-line stream nil nil)
    159           for line = (and read-line
    160                           (string-trim '(#\Space #\Tab #\Linefeed #\Page #\Return #\Rubout)
    161                                        read-line))
    162           do
    163              (when (not line) (return))
    164              (when (or (starts-with-p line "(in-package ")
    165                        (starts-with-p line "(cl:in-package "))
    166                (let ((p (extract-package line)))
    167                  (when p (setf pkg p)))
    168                (return)))
    169     (file-position stream point)
    170     (values (readtable-for-package pkg) pkg)))
    171 
    172 (defun skip-whitespace (stream)
    173   (peek-char t stream nil nil))
    174 
    175 ;; Skip over N toplevel forms.
    176 (defun skip-toplevel-forms (n stream)
    177   (let ((*read-suppress* t))
    178     (dotimes (i n)
    179       (read stream))
    180     (skip-whitespace stream)))
    181 
    182 (defun read-source-form (n stream)
    183   "Read the Nth toplevel form number with source location recording.
    184 Return the form and the source-map."
    185   (multiple-value-bind (*readtable* *package*) (guess-reader-state stream)
    186     (let (#+sbcl
    187           (*features* (append *features*
    188                               (symbol-value (find-symbol "+INTERNAL-FEATURES+" 'sb-impl)))))
    189       (skip-toplevel-forms n stream)
    190       (read-and-record-source-map stream))))
    191 
    192 (defun source-path-stream-position (path stream)
    193   "Search the source-path PATH in STREAM and return its position."
    194   (check-source-path path)
    195   (destructuring-bind (tlf-number . path) path
    196     (multiple-value-bind (form source-map) (read-source-form tlf-number stream)
    197       (source-path-source-position (cons 0 path) form source-map))))
    198 
    199 (defun check-source-path (path)
    200   (unless (and (consp path)
    201                (every #'integerp path))
    202     (error "The source-path ~S is not valid." path)))
    203 
    204 (defun source-path-string-position (path string)
    205   (with-input-from-string (s string)
    206     (source-path-stream-position path s)))
    207 
    208 (defun source-path-file-position (path filename)
    209   ;; We go this long way round, and don't directly operate on the file
    210   ;; stream because FILE-POSITION (used above) is not totally savy even
    211   ;; on file character streams; on SBCL, FILE-POSITION returns the binary
    212   ;; offset, and not the character offset---screwing up on Unicode.
    213   (let ((toplevel-number (first path))
    214 	(buffer))
    215     (with-open-file (file filename)
    216       (skip-toplevel-forms (1+ toplevel-number) file)
    217       (let ((endpos (file-position file)))
    218 	(setq buffer (make-array (list endpos) :element-type 'character
    219 				 :initial-element #\Space))
    220 	(assert (file-position file 0))
    221 	(read-sequence buffer file :end endpos)))
    222     (source-path-string-position path buffer)))
    223 
    224 (defgeneric sexp-in-bounds-p (sexp i)
    225   (:method ((list list) i)
    226     (< i (loop for e on list
    227                count t)))
    228   (:method ((sexp t) i) nil))
    229 
    230 (defgeneric sexp-ref (sexp i)
    231   (:method ((s list) i) (elt s i)))
    232 
    233 (defun source-path-source-position (path form source-map)
    234   "Return the start position of PATH from FORM and SOURCE-MAP.  All
    235 subforms along the path are considered and the start and end position
    236 of the deepest (i.e. smallest) possible form is returned."
    237   ;; compute all subforms along path
    238   (let ((forms (loop for i in path
    239 		     for f = form then (if (sexp-in-bounds-p f i)
    240 					   (sexp-ref f i))
    241 		     collect f)))
    242     ;; select the first subform present in source-map
    243     (loop for form in (nreverse forms)
    244 	  for ((start . end) . rest) = (gethash form source-map)
    245 	  when (and start end (not rest))
    246 	  return (return (values start end)))))