5 void uppercase_seq( char *seq )
7 /* Martin A. Hansen, May 2008 */
9 /* Uppercase a sequence in place. */
13 for ( i = 0; seq[ i ]; i++ ) {
14 seq[ i ] = toupper( seq[ i ] );
19 void lowercase_seq( char *seq )
21 /* Martin A. Hansen, May 2008 */
23 /* Lowercase a sequence in place. */
27 for ( i = 0; seq[ i ]; i++ ) {
28 seq[ i ] = tolower( seq[ i ] );
33 void revcomp_dna( char *seq )
35 /* Martin A. Hansen, May 2008 */
37 /* Reverse complement a DNA sequence in place. */
39 complement_dna( seq );
44 void revcomp_rna( char *seq )
46 /* Martin A. Hansen, May 2008 */
48 /* Reverse complement a RNA sequence in place. */
50 complement_rna( seq );
55 void revcomp_nuc( char *seq )
57 /* Martin A. Hansen, May 2008 */
59 /* Reverse complements a nucleotide sequence in place. */
61 complement_nuc( seq );
66 void complement_nuc( char *seq )
68 /* Martin A. Hansen, May 2008 */
70 /* Complements a nucleotide sequence, */
71 /* after guess the type. */
73 if ( is_dna( seq ) ) {
74 complement_dna( seq );
75 } else if ( is_rna( seq ) ) {
76 complement_rna( seq );
78 die( "Complement nuc failed.\n" );
83 void complement_dna( char *seq )
85 /* Martin A. Hansen, May 2008 */
87 /* Complements a DNA sequence including */
88 /* ambiguity coded nucleotides. */;
92 for ( i = 0; seq[ i ]; i++ )
96 case 'a': seq[ i ] = 't'; break;
97 case 'A': seq[ i ] = 'T'; break;
98 case 'c': seq[ i ] = 'g'; break;
99 case 'C': seq[ i ] = 'G'; break;
100 case 'g': seq[ i ] = 'c'; break;
101 case 'G': seq[ i ] = 'C'; break;
102 case 't': seq[ i ] = 'a'; break;
103 case 'u': seq[ i ] = 'a'; break;
104 case 'T': seq[ i ] = 'A'; break;
105 case 'U': seq[ i ] = 'A'; break;
106 case 'm': seq[ i ] = 'k'; break;
107 case 'M': seq[ i ] = 'K'; break;
108 case 'r': seq[ i ] = 'y'; break;
109 case 'R': seq[ i ] = 'Y'; break;
110 case 'w': seq[ i ] = 'w'; break;
111 case 'W': seq[ i ] = 'W'; break;
112 case 's': seq[ i ] = 'S'; break;
113 case 'S': seq[ i ] = 'S'; break;
114 case 'y': seq[ i ] = 'r'; break;
115 case 'Y': seq[ i ] = 'R'; break;
116 case 'k': seq[ i ] = 'm'; break;
117 case 'K': seq[ i ] = 'M'; break;
118 case 'b': seq[ i ] = 'v'; break;
119 case 'B': seq[ i ] = 'V'; break;
120 case 'd': seq[ i ] = 'h'; break;
121 case 'D': seq[ i ] = 'H'; break;
122 case 'h': seq[ i ] = 'd'; break;
123 case 'H': seq[ i ] = 'D'; break;
124 case 'v': seq[ i ] = 'b'; break;
125 case 'V': seq[ i ] = 'B'; break;
126 case 'n': seq[ i ] = 'n'; break;
127 case 'N': seq[ i ] = 'N'; break;
134 void complement_rna( char *seq )
136 /* Martin A. Hansen, May 2008 */
138 /* Complements an RNA sequence including */
139 /* ambiguity coded nucleotides. */;
143 for ( i = 0; seq[ i ]; i++ )
147 case 'a': seq[ i ] = 'u'; break;
148 case 'A': seq[ i ] = 'U'; break;
149 case 'c': seq[ i ] = 'g'; break;
150 case 'C': seq[ i ] = 'G'; break;
151 case 'g': seq[ i ] = 'c'; break;
152 case 'G': seq[ i ] = 'C'; break;
153 case 't': seq[ i ] = 'a'; break;
154 case 'u': seq[ i ] = 'a'; break;
155 case 'T': seq[ i ] = 'A'; break;
156 case 'U': seq[ i ] = 'A'; break;
157 case 'm': seq[ i ] = 'k'; break;
158 case 'M': seq[ i ] = 'K'; break;
159 case 'r': seq[ i ] = 'y'; break;
160 case 'R': seq[ i ] = 'Y'; break;
161 case 'w': seq[ i ] = 'w'; break;
162 case 'W': seq[ i ] = 'W'; break;
163 case 's': seq[ i ] = 'S'; break;
164 case 'S': seq[ i ] = 'S'; break;
165 case 'y': seq[ i ] = 'r'; break;
166 case 'Y': seq[ i ] = 'R'; break;
167 case 'k': seq[ i ] = 'm'; break;
168 case 'K': seq[ i ] = 'M'; break;
169 case 'b': seq[ i ] = 'v'; break;
170 case 'B': seq[ i ] = 'V'; break;
171 case 'd': seq[ i ] = 'h'; break;
172 case 'D': seq[ i ] = 'H'; break;
173 case 'h': seq[ i ] = 'd'; break;
174 case 'H': seq[ i ] = 'D'; break;
175 case 'v': seq[ i ] = 'b'; break;
176 case 'V': seq[ i ] = 'B'; break;
177 case 'n': seq[ i ] = 'n'; break;
178 case 'N': seq[ i ] = 'N'; break;
185 void reverse( char *string )
187 /* Martin A. Hansen, May 2008 */
189 /* Reverses a string in place. */
196 j = strlen( string ) - 1;
202 string[ i ] = string[ j ];
211 void seq2nuc_simple( char *seq )
213 /* Martin A. Hansen, May 2008 */
215 /* Uppercases all DNA letters, while transforming */
216 /* all non-DNA letters in sequence to Ns. */
220 for ( i = 0; seq[ i ]; i++ )
230 case 'a': seq[ i ] = 'A'; break;
231 case 't': seq[ i ] = 'T'; break;
232 case 'c': seq[ i ] = 'C'; break;
233 case 'g': seq[ i ] = 'G'; break;
234 case 'u': seq[ i ] = 'U'; break;
235 default: seq[ i ] = 'N';
241 void dna2rna( char *seq )
243 /* Martin A. Hansen, May 2008 */
245 /* Converts a DNA sequence to RNA by changing T and t to U and u. */
249 for ( i = 0; seq[ i ]; i++ )
253 case 't': seq[ i ] = 'u'; break;
254 case 'T': seq[ i ] = 'U'; break;
261 void rna2dna( char *seq )
263 /* Martin A. Hansen, May 2008 */
265 /* Converts a RNA sequence to RNA by changing T and u to T and t. */
269 for ( i = 0; seq[ i ]; i++ )
273 case 'u': seq[ i ] = 't'; break;
274 case 'U': seq[ i ] = 'T'; break;
281 bool is_dna( char *seq )
283 /* Martin A. Hansen, May 2008 */
285 /* Determines if a given sequence is DNA, */
286 /* from inspection of the first 100 residues. */
290 for ( i = 0; seq[ i ]; i++ )
294 case 'A': case 'a': break;
295 case 'G': case 'g': break;
296 case 'C': case 'c': break;
297 case 'T': case 't': break;
298 case 'R': case 'r': break;
299 case 'Y': case 'y': break;
300 case 'W': case 'w': break;
301 case 'S': case 's': break;
302 case 'M': case 'm': break;
303 case 'K': case 'k': break;
304 case 'H': case 'h': break;
305 case 'D': case 'd': break;
306 case 'V': case 'v': break;
307 case 'B': case 'b': break;
308 case 'N': case 'n': break;
313 default: return FALSE;
325 bool is_rna( char *seq )
327 /* Martin A. Hansen, May 2008 */
329 /* Determines if a given sequence is RNA, */
330 /* from inspection of the first 100 residues. */
334 for ( i = 0; seq[ i ]; i++ )
338 case 'A': case 'a': break;
339 case 'G': case 'g': break;
340 case 'C': case 'c': break;
341 case 'U': case 'u': break;
342 case 'R': case 'r': break;
343 case 'Y': case 'y': break;
344 case 'W': case 'w': break;
345 case 'S': case 's': break;
346 case 'M': case 'm': break;
347 case 'K': case 'k': break;
348 case 'H': case 'h': break;
349 case 'D': case 'd': break;
350 case 'V': case 'v': break;
351 case 'B': case 'b': break;
352 case 'N': case 'n': break;
357 default: return FALSE;
369 bool is_protein( char *seq )
371 /* Martin A. Hansen, May 2008 */
373 /* Determines if a given sequence is protein, */
374 /* from inspection of the first 100 residues. */
378 for ( i = 0; seq[ i ]; i++ )
382 case 'K': case 'k': break;
383 case 'R': case 'r': break;
384 case 'H': case 'h': break;
385 case 'D': case 'd': break;
386 case 'E': case 'e': break;
387 case 'S': case 's': break;
388 case 'T': case 't': break;
389 case 'N': case 'n': break;
390 case 'Q': case 'q': break;
391 case 'A': case 'a': break;
392 case 'V': case 'v': break;
393 case 'I': case 'i': break;
394 case 'L': case 'l': break;
395 case 'M': case 'm': break;
396 case 'F': case 'f': break;
397 case 'Y': case 'y': break;
398 case 'W': case 'w': break;
399 case 'C': case 'c': break;
400 case 'G': case 'g': break;
401 case 'P': case 'p': break;
402 case 'Z': case 'z': break;
403 case 'B': case 'b': break;
404 case 'X': case 'x': break;
410 default: return FALSE;
422 char *seq_guess_type( char *seq )
424 /* Martin A. Hansen, May 2008 */
426 /* Guess the type of a given sequnce, */
427 /* which is returned as a pointer to a string. */
433 if ( is_dna( seq ) ) {
435 } else if ( is_rna( seq ) ) {
437 } else if ( is_protein( seq ) ) {
440 die( "Could not guess sequence type.\n" );
447 bool contain_N( char *seq )
449 /* Martin A. Hansen, May 2008 */
451 /* Check if a sequence contain N or n residues. */
455 for ( i = 0; seq[ i ]; i++ )
459 case 'N': case 'n': return TRUE;
468 int oligo2bin( char *oligo )
470 /* Martin A. Hansen, August 2004 */
472 /* Pack a max 15 nucleotide long oligo into a four byte integer. */
477 if ( strlen( oligo ) > 15 ) {
478 die( "Oligo will not fit in an integer." );
483 for ( i = 0; oligo[ i ]; i++ )
487 switch ( oligo[ i ] )
489 case 'A': case 'a': bin |= 0; break;
490 case 'N': case 'n': bin |= 0; break;
491 case 'T': case 't': bin |= 1; break;
492 case 'U': case 'u': bin |= 1; break;
493 case 'C': case 'c': bin |= 2; break;
494 case 'G': case 'g': bin |= 3; break;
495 default: die( "Unrecognized nucleotide." );