]> git.donarmstrong.com Git - mothur.git/blob - fastamap.cpp
added screen.seqs command - pds
[mothur.git] / fastamap.cpp
1 /*
2  *  fastamap.cpp
3  *  mothur
4  *
5  *  Created by Sarah Westcott on 1/16/09.
6  *  Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved.
7  *
8  */
9
10 #include "fastamap.h"
11
12 /*******************************************************************************/
13 void FastaMap::readFastaFile(ifstream& in) {
14         try {
15                 string name, sequence, line;
16                 sequence = "";
17         
18                 in >> line;
19                 name = line.substr(1, line.length());  //rips off '>'
20         
21                 //read through file
22                 while (!in.eof()) {
23                         in >> line;
24
25                         if (line[0] != '>') {  //if it's a sequence line
26                                 sequence += line;
27                         }
28                         else{
29                         //input sequence info into map
30                                 seqmap[name] = sequence;  
31
32                                 it = data.find(sequence);
33                                 if (it == data.end()) {         //it's unique.
34                                         data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
35                                         data[sequence].groupnumber = 1;
36                                         data[sequence].names = name;
37                                 }else { // its a duplicate.
38                                         data[sequence].names += "," + name;
39                                         data[sequence].groupnumber++;
40                                 }
41                                 name = (line.substr(1, (line.npos))); //The line you just read is a new name so rip off '>'
42                                 sequence = "";
43                         }
44                         
45                         gobble(in);
46                 }
47                 it = data.find(sequence);
48                 if (it == data.end()) {         //it's unique.
49                         data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
50                         data[sequence].groupnumber = 1;
51                         data[sequence].names = name;
52                 }else { // its a duplicate.
53                         data[sequence].names += "," + name;
54                         data[sequence].groupnumber++;
55                 }
56                 
57                         
58         }
59         catch(exception& e) {
60                 cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function readFastaFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
61                 exit(1);
62         }
63         catch(...) {
64                 cout << "An unknown error has occurred in the FastaMap class function readFastaFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
65                 exit(1);
66         }
67 }
68 /*******************************************************************************/
69 string FastaMap::getGroupName(string seq) {  //pass a sequence name get its group
70         return data[seq].groupname;
71 }
72 /*******************************************************************************/
73 string FastaMap::getNames(string seq) { //pass a sequence get the string of names in the group separated by ','s.
74         return data[seq].names;
75 }
76 /*******************************************************************************/
77 int FastaMap::getGroupNumber(string seq) {      //pass a sequence get the number of identical sequences.
78         return data[seq].groupnumber;
79 }
80 /*******************************************************************************/
81 string FastaMap::getSequence(string name) {
82         it2 = seqmap.find(name);
83         if (it2 == seqmap.end()) {      //it's not found
84                 return "not found";
85         }else { // found it
86                 return it2->second;
87         }
88 }       
89 /*******************************************************************************/
90 void FastaMap::push_back(string name, string seq) {
91         it = data.find(seq);
92         if (it == data.end()) {         //it's unique.
93                 data[seq].groupname = name;  //group name will be the name of the first duplicate sequence found.
94                 data[seq].groupnumber = 1;
95                 data[seq].names = name;
96         }else { // its a duplicate.
97                 data[seq].names += "," + name;
98                 data[seq].groupnumber++;
99         }
100         
101         seqmap[name] = seq;
102 }
103 /*******************************************************************************/
104 int FastaMap::sizeUnique(){ //returns datas size which is the number of unique sequences
105         return data.size();
106 }
107 /*******************************************************************************/
108 void FastaMap::printNamesFile(ostream& out){ //prints data
109         try {
110                 // two column file created with groupname and them list of identical sequence names
111                 for (it = data.begin(); it != data.end(); it++) {
112                         out << it->second.groupname << '\t' << it->second.names << endl;
113                 }
114         }
115         catch(exception& e) {
116                 cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
117                 exit(1);
118         }
119         catch(...) {
120                 cout << "An unknown error has occurred in the FastaMap class function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
121                 exit(1);
122         }
123 }
124 /*******************************************************************************/
125 void FastaMap::printCondensedFasta(ostream& out){ //prints data
126         try {
127                 // two column file created with groupname and them list of identical sequence names
128                 for (it = data.begin(); it != data.end(); it++) {
129                         out << ">" << it->second.groupname << endl;
130                         out << it->first << endl;
131                 }
132         }
133         catch(exception& e) {
134                 cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
135                 exit(1);
136         }
137         catch(...) {
138                 cout << "An unknown error has occurred in the FastaMap class function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
139                 exit(1);
140         }
141 }
142 /*******************************************************************************/
143