]> git.donarmstrong.com Git - mothur.git/blob - fastamap.cpp
fixed some bugs
[mothur.git] / fastamap.cpp
1 /*
2  *  fastamap.cpp
3  *  mothur
4  *
5  *  Created by Sarah Westcott on 1/16/09.
6  *  Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved.
7  *
8  */
9
10 #include "fastamap.h"
11
12 /*******************************************************************************/
13 void FastaMap::readFastaFile(ifstream& in) {
14         try {
15                 string name, sequence, line;
16                 sequence = "";
17                 int c;
18                 string temp;
19                 
20                 
21                 //read through file
22                 while ((c = in.get()) != EOF) {
23                         name = ""; sequence = ""; 
24                         //is this a name
25                         if (c == '>') { 
26                                 name = readName(in); 
27                                 sequence = readSequence(in); 
28                         }else {  cout << "Error fasta in your file. Please correct." << endl; }
29
30                         //store info in map
31                         //input sequence info into map
32                         seqmap[name] = sequence;  
33                         it = data.find(sequence);
34                         if (it == data.end()) {         //it's unique.
35                                 data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
36                                 data[sequence].groupnumber = 1;
37                                 data[sequence].names = name;
38                         }else { // its a duplicate.
39                                 data[sequence].names += "," + name;
40                                 data[sequence].groupnumber++;
41                         }       
42                         
43                         gobble(in);
44                 }
45                                         
46         }
47         catch(exception& e) {
48                 cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function readFastaFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
49                 exit(1);
50         }
51         catch(...) {
52                 cout << "An unknown error has occurred in the FastaMap class function readFastaFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
53                 exit(1);
54         }
55 }
56 /*******************************************************************************/
57 string FastaMap::readName(ifstream& in) {
58         try{
59                 string name = "";
60                 int c;
61                 string temp;
62                 
63                 while ((c = in.get()) != EOF) {
64                         //if c is not a line return
65                         if (c != 10) {
66                                 name += c;
67                         }else { break;  }
68                 }
69                         
70                 return name;
71         }
72         catch(exception& e) {
73                 cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function readName. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
74                 exit(1);
75         }
76         catch(...) {
77                 cout << "An unknown error has occurred in the FastaMap class function readName. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
78                 exit(1);
79         }
80 }
81
82 /*******************************************************************************/
83 string FastaMap::readSequence(ifstream& in) {
84         try{
85                 string sequence = "";
86                 string line;
87                 int pos, c;
88                 
89                 while (!in.eof()) {
90                         //save position in file in case next line is a new name.
91                         pos = in.tellg();
92                         line = "";
93                         in >> line;                     
94                         //if you are at a new name
95                         if (line[0] == '>') {
96                                 //put file pointer back since you are now at a new name
97                                 in.seekg(pos, ios::beg);
98                                 c = in.get();  //because you put it back to a newline char
99                                 break;
100                         }else {  sequence += line;      }
101                 }
102                         
103                 return sequence;
104         }
105         catch(exception& e) {
106                 cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function readSequence. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
107                 exit(1);
108         }
109         catch(...) {
110                 cout << "An unknown error has occurred in the FastaMap class function readSequence. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
111                 exit(1);
112         }
113 }
114 /*******************************************************************************/
115 string FastaMap::getGroupName(string seq) {  //pass a sequence name get its group
116         return data[seq].groupname;
117 }
118 /*******************************************************************************/
119 string FastaMap::getNames(string seq) { //pass a sequence get the string of names in the group separated by ','s.
120         return data[seq].names;
121 }
122 /*******************************************************************************/
123 int FastaMap::getGroupNumber(string seq) {      //pass a sequence get the number of identical sequences.
124         return data[seq].groupnumber;
125 }
126 /*******************************************************************************/
127 string FastaMap::getSequence(string name) {
128         it2 = seqmap.find(name);
129         if (it2 == seqmap.end()) {      //it's not found
130                 return "not found";
131         }else { // found it
132                 return it2->second;
133         }
134 }       
135 /*******************************************************************************/
136 void FastaMap::push_back(string name, string seq) {
137         it = data.find(seq);
138         if (it == data.end()) {         //it's unique.
139                 data[seq].groupname = name;  //group name will be the name of the first duplicate sequence found.
140                 data[seq].groupnumber = 1;
141                 data[seq].names = name;
142         }else { // its a duplicate.
143                 data[seq].names += "," + name;
144                 data[seq].groupnumber++;
145         }
146         
147         seqmap[name] = seq;
148 }
149 /*******************************************************************************/
150 int FastaMap::sizeUnique(){ //returns datas size which is the number of unique sequences
151         return data.size();
152 }
153 /*******************************************************************************/
154 void FastaMap::printNamesFile(ostream& out){ //prints data
155         try {
156                 // two column file created with groupname and them list of identical sequence names
157                 for (it = data.begin(); it != data.end(); it++) {
158                         out << it->second.groupname << '\t' << it->second.names << endl;
159                 }
160         }
161         catch(exception& e) {
162                 cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
163                 exit(1);
164         }
165         catch(...) {
166                 cout << "An unknown error has occurred in the FastaMap class function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
167                 exit(1);
168         }
169 }
170 /*******************************************************************************/
171 void FastaMap::printCondensedFasta(ostream& out){ //prints data
172         try {
173                 //creates a fasta file
174                 for (it = data.begin(); it != data.end(); it++) {
175                         out << ">" << it->second.groupname << endl;
176                         out << it->first << endl;
177                 }
178         }
179         catch(exception& e) {
180                 cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
181                 exit(1);
182         }
183         catch(...) {
184                 cout << "An unknown error has occurred in the FastaMap class function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
185                 exit(1);
186         }
187 }
188 /*******************************************************************************/
189