]> git.donarmstrong.com Git - mothur.git/blob - fastamap.cpp
This is v.1.4.0
[mothur.git] / fastamap.cpp
1 /*
2  *  fastamap.cpp
3  *  mothur
4  *
5  *  Created by Sarah Westcott on 1/16/09.
6  *  Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved.
7  *
8  */
9
10 #include "fastamap.h"
11 #include "sequence.hpp"
12
13 /*******************************************************************************/
14
15 void FastaMap::readFastaFile(string inFileName) {
16         try {
17                 ifstream in;
18                 openInputFile(inFileName, in);
19                 string name, sequence, line;
20                 sequence = "";
21                 string temp;
22
23                 while(!in.eof()){
24                         Sequence currSeq(in);
25                         name = currSeq.getName();
26                         
27                         if(currSeq.getIsAligned())      {       sequence = currSeq.getAligned();        }
28                         else                                            {       sequence = currSeq.getUnaligned();      }
29                         
30                         seqmap[name] = sequence;  
31                         map<string,group>::iterator it = data.find(sequence);
32                         if (it == data.end()) {         //it's unique.
33                                 data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
34 //                              data[sequence].groupnumber = 1;
35                                 data[sequence].names = name;
36                         }else { // its a duplicate.
37                                 data[sequence].names += "," + name;
38 //                              data[sequence].groupnumber++;
39                         }       
40                         
41                         gobble(in);
42                 }
43                 in.close();             
44         }
45         catch(exception& e) {
46                 cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function readFastaFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
47                 exit(1);
48         }
49         catch(...) {
50                 cout << "An unknown error has occurred in the FastaMap class function readFastaFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
51                 exit(1);
52         }
53 }
54
55 /*******************************************************************************/
56
57 void FastaMap::readFastaFile(string inFastaFile, string oldNameFileName){ //prints data
58         
59         ifstream oldNameFile;
60         openInputFile(oldNameFileName, oldNameFile);
61         
62         map<string,string> oldNameMap;
63         string name, list;
64         while(!oldNameFile.eof()){
65                 oldNameFile >> name >> list;
66                 oldNameMap[name] = list;
67                 gobble(oldNameFile);
68         }
69         oldNameFile.close();
70         
71         ifstream inFASTA;
72         openInputFile(inFastaFile, inFASTA);
73         string sequence;
74         while(!inFASTA.eof()){
75                 Sequence currSeq(inFASTA);
76                 name = currSeq.getName();
77                 
78                 if(currSeq.getIsAligned())      {       sequence = currSeq.getAligned();        }
79                 else                                            {       sequence = currSeq.getUnaligned();      }
80                 
81                 seqmap[name] = sequence;  
82                 map<string,group>::iterator it = data.find(sequence);
83                 if (it == data.end()) {         //it's unique.
84                         data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
85 //                      data[sequence].groupnumber = 1;
86                         data[sequence].names = oldNameMap[name];
87                 }else { // its a duplicate.
88                         data[sequence].names += "," + oldNameMap[name];
89 //                      data[sequence].groupnumber++;
90                 }       
91                 
92                 gobble(inFASTA);
93         }
94         
95         
96         inFASTA.close();
97 }
98
99 /*******************************************************************************/
100
101 string FastaMap::getGroupName(string seq) {  //pass a sequence name get its group
102         return data[seq].groupname;
103 }
104
105 /*******************************************************************************/
106
107 string FastaMap::getNames(string seq) { //pass a sequence get the string of names in the group separated by ','s.
108         return data[seq].names;
109 }
110
111 /*******************************************************************************/
112
113 string FastaMap::getSequence(string name) {
114         
115         map<string,string>::iterator it = seqmap.find(name);
116         if (it == seqmap.end()) {       return "not found";             }
117         else                                    {       return it->second;              }
118         
119 }       
120
121 /*******************************************************************************/
122
123 void FastaMap::push_back(string name, string seq) {
124         
125         map<string,group>::iterator it = data.find(seq);
126         if (it == data.end()) {         //it's unique.
127                 data[seq].groupname = name;  //group name will be the name of the first duplicate sequence found.
128                 data[seq].names = name;
129         }else { // its a duplicate.
130                 data[seq].names += "," + name;
131         }
132         seqmap[name] = seq;
133 }
134
135 /*******************************************************************************/
136
137 int FastaMap::sizeUnique(){ //returns datas size which is the number of unique sequences
138         return data.size();
139 }
140
141 /*******************************************************************************/
142
143 void FastaMap::printNamesFile(string outFileName){ //prints data
144         try {
145                 ofstream outFile;
146                 openOutputFile(outFileName, outFile);
147                 
148                 // two column file created with groupname and them list of identical sequence names
149                 for (map<string,group>::iterator it = data.begin(); it != data.end(); it++) {
150                         outFile << it->second.groupname << '\t' << it->second.names << endl;
151                 }
152                 outFile.close();
153         }
154         catch(exception& e) {
155                 cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
156                 exit(1);
157         }
158         catch(...) {
159                 cout << "An unknown error has occurred in the FastaMap class function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
160                 exit(1);
161         }
162 }
163
164 /*******************************************************************************/
165
166 void FastaMap::printCondensedFasta(string outFileName){ //prints data
167         try {
168                 ofstream out;
169                 openOutputFile(outFileName, out);
170                 //creates a fasta file
171                 for (map<string,group>::iterator it = data.begin(); it != data.end(); it++) {
172                         out << ">" << it->second.groupname << endl;
173                         out << it->first << endl;
174                 }
175                 out.close();
176         }
177         catch(exception& e) {
178                 cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
179                 exit(1);
180         }
181         catch(...) {
182                 cout << "An unknown error has occurred in the FastaMap class function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
183                 exit(1);
184         }
185 }
186
187 /*******************************************************************************/
188