1 /* Martin Asser Hansen (mail@maasha.dk) Copyright (C) 2008 - All right reserved */
3 /* Constants for allocating memory for sequence entries. */
4 #define MAX_SEQ_NAME 1024
5 #define MAX_SEQ 250000000
7 /* Macro to test if a given char is sequence (DNA, RNA, Protein, indels. brackets, etc ). */
8 #define isseq( x ) ( x > 32 && x < 127 ) ? 1 : 0
10 /* Macro to test if a given char is DNA. */
11 #define isDNA( c ) ( c == 'A' || c == 'a' || c == 'T' || c == 't' || c == 'C' || c == 'c' || c == 'G' || c == 'g' || c == 'N' || c == 'n' ) ? 1 : 0
13 /* Macro to test if a given char is RNA. */
14 #define isRNA( c ) ( c == 'A' || c == 'a' || c == 'U' || c == 'u' || c == 'C' || c == 'c' || c == 'G' || c == 'g' || c == 'N' || c == 'n' ) ? 1 : 0
16 /* Macros for converting DNA ASCII to binary. */
17 #define add_A( c ) /* add 00 to the rightmost two bits of bin (i.e. do nothing). */
18 #define add_T( c ) ( c |= 3 ) /* add 11 on the rightmost two bits of c. */
19 #define add_C( c ) ( c |= 1 ) /* add 01 on the rightmost two bits of c. */
20 #define add_G( c ) ( c |= 2 ) /* add 10 on the rightmost two bits of c. */
23 /* Definition of a sequence entry */
31 typedef struct _seq_entry seq_entry;
33 /* Byte array for fast convertion of binary blocks to DNA. */
34 /* Binary blocks holds four nucleotides encoded in 2 bits: */
35 /* A=00 T=11 C=01 G=10 */
38 /* Initialize a new sequence entry. */
39 seq_entry *seq_new( size_t max_seq_name, size_t max_seq );
41 /* Destroy a sequence entry. */
42 void seq_destroy( seq_entry *entry );
44 /* Uppercase sequence. */
45 void seq_uppercase( char *seq );
47 /* Lowercase sequence. */
48 void lowercase_seq( char *seq );
50 /* Reverse compliments DNA sequence. */
51 void revcomp_dna( char *seq );
53 /* Reverse compliments RNA sequence. */
54 void revcomp_rna( char *seq );
56 /* Reverse compliment nucleotide sequnce after guessing the sequence type. */
57 void revcomp_nuc( char *seq );
59 /* Complement DNA sequence. (NB it is not reversed!). */
60 void complement_dna( char *seq );
62 /* Complement RNA sequence. (NB it is not reversed!). */
63 void complement_rna( char *seq );
65 /* Complement nucleotide sequence after guessing the sequence type. */
66 void complement_nuc( char *seq );
68 /* Reverse sequence. */
69 void reverse( char *seq );
71 /* Convert all non-nucleotide letters to Ns. */
72 void seq2nuc_simple( char *seq );
74 /* Convert DNA into RNA by change t and T to u and U, respectively. */
75 void dna2rna( char *seq );
77 /* Convert RNA into DNA by change u and U to t and T, respectively. */
78 void rna2dna( char *seq );
80 /* Check if a sequence is DNA by inspecting the first 100 residues. */
81 bool is_dna( char *seq );
83 /* Check if a sequence is RNA by inspecting the first 100 residues. */
84 bool is_rna( char *seq );
86 /* Check if a sequence is protein by inspecting the first 100 residues. */
87 bool is_protein( char *seq );
89 /* Guess if a sequence is DNA, RNA, or protein by inspecting the first 100 residues. */
90 char *seq_guess_type( char *seq );
92 /* Check if a sequence contain N or n. */
93 bool contain_N( char *seq );
95 /* Pack a nucleotide oligo (max length 15) into a binary/integer (good for hash keys). */
96 int oligo2bin( char *oligo );