X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=emacs_el%2Fdna-mode.el;fp=emacs_el%2Fdna-mode.el;h=0000000000000000000000000000000000000000;hb=57355d630c5b9472f9b77eddfb17886931784080;hp=5433ea1061d6a8a4163c6639b9f192145258ca83;hpb=317502fdfe0ea759f0d166e615a33be59a1f7fba;p=lib.git diff --git a/emacs_el/dna-mode.el b/emacs_el/dna-mode.el deleted file mode 100644 index 5433ea1..0000000 --- a/emacs_el/dna-mode.el +++ /dev/null @@ -1,590 +0,0 @@ -;;; dna-mode.el --- a major mode for editing dna sequences -;; -;; ~harley/share/emacs/pkg/dna/dna-mode.el --- -;; -;; $Id: dna-mode.el,v 1.40 2004/04/20 19:03:04 harley Exp $ -;; - -;; Author: Harley Gorrell -;; URL: http://www.mahalito.net/~harley/elisp/dna-mode.el -;; License: GPL v2 -;; Keywords: dna, emacs, editing - -;;; Commentary: -;; * A collection of functions for editing DNA sequences. It -;; provides functions to make editing dna in Emacs easier. -;; -;; Dna-mode will: -;; * Fontify keywords and line numbers in sequences. -;; * Fontify bases when font-lock-mode is disabled. -;; * Incrementally search dna over pads and numbers. -;; * Complement and reverse complement a region. -;; * Move over bases and entire sequences. -;; * Detect sequence files by content. - -;;; Installation: -;; -------------------- -;; Here are two suggested ways for installing this package. -;; You can choose to autoload it when needed, or load it -;; each time emacs is started. Put one of the following -;; sections in your .emacs: -;; -;; ---Autoload: -;; (autoload 'dna-mode "dna-mode" "Major mode for dna" t) -;; (add-to-list -;; 'auto-mode-alist -;; '("\\.\\(fasta\\|fa\\|exp\\|ace\\|gb\\)\\'" . dna-mode)) -;; (add-hook 'dna-mode-hook 'turn-on-font-lock) -;; -;; ---Load: -;; (setq dna-do-setup-on-load t) -;; (load "/pathname/dna-mode") -;; -;; The dna-isearch-forward function (and isearch in general) -;; is much more useful with something like the following: -;; (make-face 'isearch) -;; (set-face-background 'isearch "yellow") -;; (setq-default search-highlight t) - -;;; History: -;; 2003-03-16: Updated URL and contact info -;; 2004-04-20: Added dna-color-bases-region to the keymap for Mike. - -;;; User customizable vars start here - -;;; Code: -(defvar dna-mode-hook nil - "*Hook to setup `dna-mode'.") - -(defvar dna-mode-load-hook nil - "*Hook to run when `dna-mode' is loaded.") - -(defvar dna-setup-on-load nil - "*If not nil setup dna mode on load by running `dna-`add-hook's'.") - -;; Bases -(defvar dna-valid-base-regexp - "[-*:acgtmrwsykvhdbxnACGTMRWSYKVHDBXN]" - "*A regexp which matches a single base.") - -(defvar dna-base-complement-list - '((?- . ?-) (?n . ?n) (?* . ?*) (?x . ?x) (?: . ?:) ; identity - (?a . ?t) (?c . ?g) (?g . ?c) (?t . ?a) ; single - (?m . ?k) (?r . ?y) (?w . ?w) (?s . ?s) (?y . ?r) (?k . ?m) ; double - (?v . ?b) (?h . ?d) (?d . ?h) (?b . ?v) ; triple - ) - "*List of bases and their complements. -Bases should be lowercase, as they are upcased when the `vector is made.") - -;; These are the colors used when coloring bases. -(defvar dna-base-color-a "blue") -(defvar dna-base-color-c "black") -(defvar dna-base-color-g "green") -(defvar dna-base-color-t "red") - -;; Dna-isearch -(defvar dna-cruft-regexp "[* 0-9\t\n]" - "*Regexp to match cruft which may appear between bases. -Skip over it during dna-motion and dna-isearch.") - -(defvar dna-isearch-case-fold-search t - "*Case fold dna-isearches if set.") - -;; Sequence -(defvar dna-sequence-start-regexp - "^\\(>\\|ID\\|LOCUS\\|DNA\\)" - "A regexp which matches the start of a sequence.") - -;;; End of user customizable vars - -;;; Start of internal vars and code - -(defvar dna-base-complement-vector - (let ((c-vec (make-vector 256 nil)) - (c-list dna-base-complement-list)) - (while c-list - (aset c-vec (car (car c-list)) (cdr (car c-list))) - (aset c-vec (upcase (car (car c-list))) (upcase (cdr (car c-list)))) - (setq c-list (cdr c-list))) - c-vec) - "A vector of upper and lower case bases and their complements.") - -;; I also use "Alt" as C-c is too much to type for cursor motions. -(defvar dna-mode-map - (let ((map (make-sparse-keymap))) - ;; Ctrl bindings - (define-key map "\C-c\C-f" 'dna-forward-base) - (define-key map "\C-cf" 'dna-forward-base) - (define-key map "\C-c\C-b" 'dna-backward-base) - (define-key map "\C-cb" 'dna-backward-base) - (define-key map "\C-c\C-s" 'dna-isearch-forward) - (define-key map "\C-cs" 'dna-isearch-forward) - (define-key map "\C-cr" 'dna-reverse-complement-region) - (define-key map "\C-cc" 'dna-complement-region) - (define-key map "\C-c#" 'dna-count-bases-region) - (define-key map "\M-\C-h" 'dna-mark-sequence) - (define-key map "\M-\C-a" 'dna-beginning-of-sequence) - (define-key map "\M-\C-e" 'dna-end-of-sequence) - ;; base coloring - (define-key map "\C-cg" 'dna-color-bases-region) - (define-key map "\C-cl" 'font-lock-mode) - ;; XEmacs does not like the Alt bindings - (when (not (string-match "XEmacs" (emacs-version))) - (define-key map [A-right] 'dna-forward-base) - (define-key map [A-left] 'dna-backward-base) - (define-key map [A-up] 'dna-beginning-of-sequence) - (define-key map [A-down] 'dna-end-of-sequence) - (define-key map [?\A-\C-s] 'dna-isearch-forward)) - map) - "The local keymap for `dna-mode'.") - -;;;###autoload -(defun dna-mode () - "Major mode for editing DNA sequences. - -This mode also customizes isearch to search over line breaks. -Use \\[universal-argument] Number as a prefix to dna-forward-base to move that -many bases. This skips line breaks and spaces. - -dna-color-bases-region disables font-lock-mode automaticly -as they cant work together. \\[dna-color-bases-region] turns font-lock-mode back on. - -\\{dna-mode-map}" - (interactive) - ;; - (kill-all-local-variables) - (setq mode-name "dna") - (setq major-mode 'dna-mode) - (use-local-map dna-mode-map) - ;; - (make-local-variable 'font-lock-defaults) - (setq font-lock-defaults '(dna-font-lock-keywords)) - ;; - (make-local-variable 'dna-valid-base-regexp) - (make-local-variable 'dna-sequence-start-regexp) - (make-local-variable 'dna-cruft-regexp) - (make-local-variable 'dna-isearch-case-fold-search) - ;; - (run-hooks 'dna-mode-hook)) - -;; Keywords -;; Todo: Seperate the keywords into a list for each format, rather -;; than one for all. -(defvar dna-font-lock-keywords - '( - ;; Fasta - ("^\\(>\\)\\([-_.|a-zA-Z0-9]+\\)\\([ \t]+.*\\)?" - (1 font-lock-keyword-face) - (2 font-lock-function-name-face) - (3 font-lock-comment-face nil t)) - - ;; Exp - ("^\\(ID\\) +\\([-_.a-zA-Z_0-9]+\\)" - (1 font-lock-keyword-face) (2 font-lock-function-name-face)) - ("^\\(CC\\|SQ\\)\\([ \t]\\(.*\\)\\)?$" - (1 font-lock-keyword-face) (3 font-lock-comment-face nil t)) - ("^\\(\\sw\\sw\\)[ \t]" - (1 font-lock-keyword-face)) - ("^\\(//\\)" - (1 font-lock-keyword-face)) - - ;; Ace (phrap output) - ("^\\(DNA\\|Sequence\\|BaseQuality\\) +\\([-_.a-zA-Z_0-9]+\\)" - (1 font-lock-keyword-face) (2 font-lock-function-name-face)) - - ;; Genbank - ("^\\(LOCUS\\) +\\([-_.a-zA-Z_0-9]+\\)";; are '-_.' allowed? - (1 font-lock-keyword-face) (2 font-lock-function-name-face)) - "ORIGIN" - - ; More genbank keywords... - "ACCESSION" "AUTHORS" "AUTHORS" "BASE COUNT" "DEFINITION" - "FEATURES" "JOURNAL" "JOURNAL" "KEYWORDS" "MEDLINE" "NID" - "ORGANISM" "REFERENCE" "SEGMENT" "SOURCE" "TITLE" - - ;; line numbers... - ("^[ \t]*\\([0-9]+\\)" - (1 font-lock-string-face)) - - ;; others...? - ) - "Expressions to hilight in `dna-mode'.") - - -;;; Setup functions -(defun dna-find-file-func () - "Invoke `dna-mode' if the buffer look like a sequence. -and another mode is not active. -This function is added to `find-file-hooks'." - (if (and (eq major-mode 'fundamental-mode) - (looking-at dna-sequence-start-regexp)) - (dna-mode))) - -;;;###autoload -(defun dna-add-hooks () - "Add a default set of dna-hooks. -These hooks will activate `dna-mode' when visiting a file -which has a dna-like name (.fasta or .gb) or whose contents -looks like dna. It will also turn enable fontification for `dna-mode'." - (add-hook 'dna-mode-hook 'turn-on-font-lock) - (add-hook 'find-file-hooks 'dna-find-file-func) - (add-to-list - 'auto-mode-alist - '("\\.\\(fasta\\|fa\\|exp\\|ace\\|gb\\)\\'" . dna-mode))) - -;; Setup hooks on request when this mode is loaded. -(if dna-setup-on-load - (dna-add-hooks)) - -(defun dna-next-char-func () - "Should never be called. Overridden in `dna-forward-base'." - (error "This shouldnt have been called")) - -;; Motion -(defun dna-forward-base (count) - "Move forward COUNT bases. Move backward if negative. -Skip over dna-isearch-cruft. Stop on non-base or -non-whitespace characters." - (interactive "p") - (let ((c 0) - (abscount (abs count)) - (dir (if (< count 0) -1 1)) - dna-next-char-func - bstr) - ;; - (fset 'dna-next-char-func (if (< dir 0) 'preceding-char 'following-char)) - ;; - (while (< c abscount) - (setq bstr (char-to-string (dna-next-char-func))) - (cond - ((string-match dna-valid-base-regexp bstr) - (forward-char dir) - (setq c (1+ c))) - ((string-match dna-cruft-regexp bstr) - (forward-char dir)) - (t - (message "Moved %d bases forward." c) - (setq abscount c)))) ; stop the while - - ;; Move over trailing junk when moving forward - (if (= dir 1) - (while (string-match dna-cruft-regexp - (char-to-string (dna-next-char-func))) - (forward-char dir)) - ) - ;; return the distance moved - (* dir abscount))) - -;; aaaaaaaaaa cccccccccc | gggggggggg tttttttttt - -(defun dna-backward-base (count) - "Move backward COUNT bases. See `dna-forward-base'." - (interactive "p") - (dna-forward-base (- count))) - -(defun dna-beginning-of-sequence () - "Move the start of the sequence or the buffer." - (interactive) - (goto-char - (or - (search-backward-regexp dna-sequence-start-regexp (point-min) t) - (point-min)))) - -(defun dna-end-of-sequence () - "Move to the end of the sequence or the buffer." - (interactive) - (end-of-line) - (skip-syntax-forward "-") - (let ((seqstart - (search-forward-regexp dna-sequence-start-regexp (point-max) t))) - (if seqstart (progn - (goto-char seqstart) - (beginning-of-line)) - (goto-char (point-max))))) - -(defun dna-mark-sequence () - "Put point at the beginning of a sequence, mark at end." - (interactive) - (dna-end-of-sequence) - (set-mark (point)) - (dna-beginning-of-sequence)) - -(defun dna-count-bases-region (d-start d-end) - "Count the number of bases in the region D-START to D-END. -Echos the number of bases counted. -If an invalid base is found, stops on the base and signals an error." - (interactive "r") - (let ((basecount 0)) - (goto-char d-start) - (while (< (point) d-end) - (cond - ((looking-at dna-valid-base-regexp) - (setq basecount (1+ basecount)) - (forward-char 1)) - ((looking-at dna-cruft-regexp) - (forward-char 1)) - (t - (error "Bad base found. '%s'" - (buffer-substring (point) (1+ (point))))) - )) - (message "There are %d bases in the region." basecount) - basecount)) - -;;; reverse and complement -(defun dna-complement-base-list (base) - "Complement the BASE using a list based method. -Returns the complement of the base. -It can also be used to test if the character is a base, -as all bases should have a complement." - (cdr (assq base dna-base-complement-list))) - -(defun dna-complement-base (base) - "Complement a BASE using a vector based method. -See `dna-complement-base-list' for more info." - (aref dna-base-complement-vector base)) - -(defun dna-complement (base) - "Look up the complement of the BASE and print a message. -Handy for us CS types." - (interactive "cComplement of base:") - (message "Complement of '%c' is '%c'." base (dna-complement-base base))) - -(defun dna-complement-region (r-start r-end) - "Complement a region of bases from R-START to R-END. -Complement a region of the buffer by deleting it and -inserting the complements, base by base. Non-bases are -passed over unchanged." - (interactive "r") - (let (r-string r-length r-point r-base r-cbase) - (goto-char r-start) - (setq r-string (buffer-substring-no-properties r-start r-end)) - (setq r-length (length r-string)) - (delete-region r-start r-end) - (setq r-point 0) - (while (< r-point r-length) - (setq r-base (aref r-string r-point)) - (setq r-cbase (dna-complement-base r-base)) - (insert (if r-cbase r-cbase r-base)) - (setq r-point (1+ r-point))))) - -;;;###autoload -(defun dna-reverse-complement-region (r-start r-end) - "Reverse complement a region of dna from R-START to R-END. -Works by deleting the region and inserting bases reversed -and complemented, while entering non-bases in the order -found." - (interactive "r") - (let (r-string r-length r-base r-cbase r-point r-mark) - (goto-char r-start) - (setq r-string (buffer-substring-no-properties r-start r-end)) - (setq r-length (length r-string)) - (setq r-mark (1- r-length)) - (setq r-point 0) - - ;; goodbye - (delete-region r-start r-end) - - ;; insert the bases from back to front base by base - ;; insert non-bases from front to back to preserve spacing - (while (< r-point r-length) - (setq r-base (aref r-string r-point)) - (setq r-cbase (dna-complement-base r-base)) - (if r-cbase - (progn - ;; it is a base. find the reverse and complement it - (while (not (dna-complement-base (aref r-string r-mark))) - (setq r-mark (1- r-mark))) - (insert (dna-complement-base (aref r-string r-mark))) - (setq r-mark (1- r-mark)) ) - ;; not a base, no change - (insert r-base)) - (setq r-point (1+ r-point))))) - -;; format -(defun dna-guess-format-func () - "Guess the format of the sequence the point is at or after. -Returns the format or nil." - (save-excursion - (end-of-line) - (dna-beginning-of-sequence) - (cond - ((looking-at "^>") 'fasta) - ((looking-at "^DNA") 'phrap) - ((looking-at "^ID") 'exp) - (t nil)))) - -(defun dna-guess-format () - "Guess and print the format of the sequence." - (interactive) - (message "%s" (dna-guess-format-func))) - -;;; dna-isearch stuff -(defun dna-isearch-mangle-str (str) - "Mangle the string STR into a regexp to search over cruft in sequence. -Inserts a regexp between each base which matches sequence formatting cruft. -For example, if `dna-cruft-regexp' is '[ ]', -the search string 'acgt' would transformed into 'a[ ]*c[ ]*g[ ]*t[ ]*'" - (let ((i 0) (out "")) - (while (< i (length str)) - (setq out (concat out (substring str i (1+ i)) dna-cruft-regexp "*")) - (setq i (1+ i))) - out)) - -(defadvice isearch-message-prefix (around dna-isearch-ismp) - "Set the isearch prompt string to show dna search is active. -This serves as a warning that the string is being mangled." - ad-do-it - (setq ad-return-value (concat "DNA " ad-return-value))) - -(defadvice isearch-search (around dna-isearch-iss) - "The advice used to mangle the search string in isearch." - (let ((isearch-regexp t) - ;; force case folding - (isearch-case-fold-search dna-isearch-case-fold-search) - (isearch-string (dna-isearch-mangle-str isearch-string)) ) - ad-do-it)) - -;;;###autoload -(defun dna-isearch-forward () - "Isearch forward on dna sequence. -Enable the `dna-mode' search string mangling advice and start the search." - (interactive) - ;; Enable the prompt - (ad-enable-advice 'isearch-message-prefix 'around 'dna-isearch-ismp) - (ad-activate 'isearch-message-prefix) - ;; Enable the mangling - (ad-enable-advice 'isearch-search 'around 'dna-isearch-iss) - (ad-activate 'isearch-search) - - ;; run the search - (isearch-forward) - - ;; - (ad-disable-advice 'isearch-message-prefix 'around 'dna-isearch-ismp) - (ad-activate 'isearch-message-prefix) - ;; - (ad-disable-advice 'isearch-search 'around 'dna-isearch-iss) - (ad-activate 'isearch-search)) - -;;; Work with columns of sequences. - -(defun dna-column-select-func () - "Return the start and end of the column as a cons. -Point is moved forward one." - (let (s m e) - (setq m (point)) - ;; work our way up - (while (looking-at dna-valid-base-regexp) - (setq s (point)) - (previous-line 1)) - (goto-char m) - ;; work our way down - (while (looking-at dna-valid-base-regexp) - (setq e (point)) - (next-line 1)) - (goto-char m) - ;; return the start and end of the column - (cons s (1+ e)))) - -(defun dna-column-select () - "Select the current column of text. -Sets the mark at the top and the point at the bottom of a non-blank column." - (interactive) - (let ((se (dna-column-select-func))) - (goto-char (car se)) - (push-mark) - (goto-char (cdr se)))) - -(defvar dna-column-pad "*" - "Character to use when inserting a column of pads.") - -(defun dna-column-insert-pad () - "Insert a column of pads." - (interactive) - (save-excursion - (let ((se (dna-column-select-func))) - (string-rectangle (car se) (cdr se) dna-column-pad)))) - -(defun dna-column-delete () - "Delete the current column of dna." - (interactive) - (save-excursion - (let ((se (dna-column-select-func))) - (kill-rectangle (car se) (cdr se))))) - -;;; Per base colors - -(defun dna-base-color-make-faces (&optional force) - "Build a face to display bases with. FORCE remakes the faces." - (when (or (not (facep 'dna-face-t)) force) - (let ((base-list '("a" "c" "g" "t")) - base base-face) - (while base-list - (setq base (car base-list)) - (setq base-face (intern (concat "dna-base-face-" base))) - (make-face base-face) - (set-face-foreground - base-face (symbol-value (intern (concat "dna-base-color-" base)))) - (setq base-list (cdr base-list)))))) - -;; Make faces on load -(dna-base-color-make-faces t) - -(defvar dna-color-bases-auto t - "Automaticly deactivate `font-lock-mode' when `dna-color-bases' is run. -See dna-color-bases for details.") -;; (setq dna-color-bases-auto t) - -(defun dna-color-bases-region (s e) - "Color the bases in the region S to E. -NOTE: The function `font-lock-mode' will undo the work of this -function if activated. Disable it before using this -function. If `dna-color-bases-auto' is set then `font-lock-mode' -is deactivated automatically." - (interactive "r") - (if (and dna-color-bases-auto font-lock-mode) - (font-lock-mode -1)) - (if font-lock-mode - (error "Font-lock-mode is on -- deactivate it")) - (save-excursion - (let (c) - (goto-char s) - (while (< s e) - (setq c (downcase (char-after s))) - (cond - ((eq c ?a) - (set-text-properties s (+ s 1) '(face dna-base-face-a))) - ((eq c ?c) (+ s 1) - (set-text-properties s (+ s 1) '(face dna-base-face-c))) - ((eq c ?g) (+ s 1) - (set-text-properties s (+ s 1) '(face dna-base-face-g))) - ((eq c ?t) (+ s 1) - (set-text-properties s (+ s 1) '(face dna-base-face-t))) - (t nil)) - (setq s (+ s 1)))))) - -(defun dna-uncolor-bases-region (s e) - "Uncolor the bases from S to E." - (interactive "r") - (remove-text-properties s e '(face nil))) - -;;; Functions for me. - -;; I like to datestamp sequences I work with. -(defvar dna-timestamp-format "%Y%m%d" - "Format of the time stamp which `dna-timestamp-seq' uses.") - -(defun dna-timestamp-seq () - "Insert the current date into the sequence. -Assumes fasta format." - (interactive) - (end-of-line) - (dna-beginning-of-sequence) - (end-of-line) - (insert " " (format-time-string dna-timestamp-format (current-time)))) - -;; done loading -(run-hooks 'dna-mode-load-hook) -(provide 'dna-mode) - -;;; dna-mode.el ends here