]> git.donarmstrong.com Git - mothur.git/blob - fastamap.cpp
fixes while testing 1.33.0
[mothur.git] / fastamap.cpp
1 /*
2  *  fastamap.cpp
3  *  mothur
4  *
5  *  Created by Sarah Westcott on 1/16/09.
6  *  Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved.
7  *
8  */
9
10 #include "fastamap.h"
11 #include "sequence.hpp"
12
13 /*******************************************************************************/
14
15 void FastaMap::readFastaFile(string inFileName) {
16         try {
17                 ifstream in;
18                 m->openInputFile(inFileName, in);
19                 string name, sequence, line;
20                 sequence = "";
21                 string temp;
22                 map<string, string>::iterator itName;
23                 
24                 
25                 while(!in.eof()){
26                         if (m->control_pressed) { break; }
27                         
28                         Sequence currSeq(in);
29                         name = currSeq.getName();
30                         
31                         if (name != "") {
32                                 if(currSeq.getIsAligned())      {       sequence = currSeq.getAligned();        }
33                                 else                                            {       sequence = currSeq.getUnaligned();      }
34                                 
35                                 itName = seqmap.find(name);
36                                 if (itName == seqmap.end()) { seqmap[name] = sequence;  }
37                                 else { m->mothurOut("You already have a sequence named " + name + ", sequence names must be unique, please correct."); m->mothurOutEndLine(); }
38                                 
39                                 map<string,group>::iterator it = data.find(sequence);
40                                 if (it == data.end()) {         //it's unique.
41                                         data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
42                                         //                              data[sequence].groupnumber = 1;
43                                         data[sequence].names = name;
44                                 }else { // its a duplicate.
45                                         data[sequence].names += "," + name;
46                                         //                              data[sequence].groupnumber++;
47                                 }       
48                         }
49                         m->gobble(in);
50                 }
51                 in.close();             
52         }
53         catch(exception& e) {
54                 m->errorOut(e, "FastaMap", "readFastaFile");
55                 exit(1);
56         }
57 }
58
59 /*******************************************************************************/
60
61 void FastaMap::readFastaFile(string inFastaFile, string oldNameFileName){ //prints data
62         
63         ifstream oldNameFile;
64         m->openInputFile(oldNameFileName, oldNameFile);
65         
66         map<string,string> oldNameMap;
67         map<string, string>::iterator itName;
68         string name, list;
69         while(!oldNameFile.eof()){
70                 if (m->control_pressed) { break; }
71                 
72                 oldNameFile >> name; m->gobble(oldNameFile);
73                 oldNameFile >> list;
74                 oldNameMap[name] = list;
75                 m->gobble(oldNameFile);
76         }
77         oldNameFile.close();
78         
79         ifstream inFASTA;
80         m->openInputFile(inFastaFile, inFASTA);
81         string sequence;
82         while(!inFASTA.eof()){
83                 if (m->control_pressed) { break; }
84                 
85                 Sequence currSeq(inFASTA);
86                 name = currSeq.getName();
87                 
88                 if (name != "") {
89                         if(currSeq.getIsAligned())      {       sequence = currSeq.getAligned();        }
90                         else                                            {       sequence = currSeq.getUnaligned();      }
91                         
92                         itName = seqmap.find(name);
93                         if (itName == seqmap.end()) { seqmap[name] = sequence;  }
94                         else { m->mothurOut("You already have a sequence named " + name + ", sequence names must be unique, please correct."); m->mothurOutEndLine(); }
95                         
96                         seqmap[name] = sequence;  
97                         map<string,group>::iterator it = data.find(sequence);
98                         if (it == data.end()) {         //it's unique.
99                                 data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
100                                 //                      data[sequence].groupnumber = 1;
101                                 data[sequence].names = oldNameMap[name];
102                         }else { // its a duplicate.
103                                 data[sequence].names += "," + oldNameMap[name];
104                                 //                      data[sequence].groupnumber++;
105                         }       
106                 }
107                 m->gobble(inFASTA);
108         }
109         
110         
111         inFASTA.close();
112 }
113
114 /*******************************************************************************/
115
116 string FastaMap::getGroupName(string seq) {  //pass a sequence name get its group
117         return data[seq].groupname;
118 }
119
120 /*******************************************************************************/
121
122 string FastaMap::getNames(string seq) { //pass a sequence get the string of names in the group separated by ','s.
123         return data[seq].names;
124 }
125
126 /*******************************************************************************/
127
128 string FastaMap::getSequence(string name) {
129         
130         map<string,string>::iterator it = seqmap.find(name);
131         if (it == seqmap.end()) {       return "not found";             }
132         else                                    {       return it->second;              }
133         
134 }       
135
136 /*******************************************************************************/
137
138 void FastaMap::push_back(string name, string seq) {
139         
140         map<string,group>::iterator it = data.find(seq);
141         if (it == data.end()) {         //it's unique.
142                 data[seq].groupname = name;  //group name will be the name of the first duplicate sequence found.
143                 data[seq].names = name;
144         }else { // its a duplicate.
145                 data[seq].names += "," + name;
146         }
147         seqmap[name] = seq;
148 }
149
150 /*******************************************************************************/
151
152 int FastaMap::sizeUnique(){ //returns datas size which is the number of unique sequences
153         return data.size();
154 }
155
156 /*******************************************************************************/
157
158 void FastaMap::printNamesFile(string outFileName){ //prints data
159         try {
160                 ofstream outFile;
161                 m->openOutputFile(outFileName, outFile);
162                 
163                 // two column file created with groupname and them list of identical sequence names
164                 for (map<string,group>::iterator it = data.begin(); it != data.end(); it++) {
165                         if (m->control_pressed) { break; }
166                         outFile << it->second.groupname << '\t' << it->second.names << endl;
167                 }
168                 outFile.close();
169         }
170         catch(exception& e) {
171                 m->errorOut(e, "FastaMap", "printNamesFile");
172                 exit(1);
173         }
174 }
175
176 /*******************************************************************************/
177
178 void FastaMap::printCondensedFasta(string outFileName){ //prints data
179         try {
180                 ofstream out;
181                 m->openOutputFile(outFileName, out);
182                 //creates a fasta file
183                 for (map<string,group>::iterator it = data.begin(); it != data.end(); it++) {
184                         if (m->control_pressed) { break; }
185                         out << ">" << it->second.groupname << endl;
186                         out << it->first << endl;
187                 }
188                 out.close();
189         }
190         catch(exception& e) {
191                 m->errorOut(e, "FastaMap", "printCondensedFasta");
192                 exit(1);
193         }
194 }
195
196 /*******************************************************************************/
197