2 // sequencecountparser.cpp
5 // Created by Sarah Westcott on 8/7/12.
6 // Copyright (c) 2012 Schloss Lab. All rights reserved.
9 #include "sequencecountparser.h"
11 /************************************************************/
12 SequenceCountParser::SequenceCountParser(string countfile, string fastafile) {
15 m = MothurOut::getInstance();
18 CountTable countTable;
19 countTable.readTable(countfile, true, false);
22 namesOfGroups = countTable.getNamesOfGroups();
23 for (int i = 0; i < namesOfGroups.size(); i++) {
24 vector<Sequence> temp;
25 map<string, int> tempMap;
26 seqs[namesOfGroups[i]] = temp;
27 countTablePerGroup[namesOfGroups[i]] = tempMap;
30 //read fasta file making sure each sequence is in the group file
32 m->openInputFile(fastafile, in);
37 if (m->control_pressed) { break; }
39 Sequence seq(in); m->gobble(in);
41 if (m->debug) { if((fastaCount) % 1000 == 0){ m->mothurOut("[DEBUG]: reading seq " + toString(fastaCount) + "\n."); } }
43 if (seq.getName() != "") {
45 allSeqsMap[seq.getName()] = seq.getName();
46 vector<int> groupCounts = countTable.getGroupCounts(seq.getName());
48 for (int i = 0; i < namesOfGroups.size(); i++) {
49 if (groupCounts[i] != 0) {
50 seqs[namesOfGroups[i]].push_back(seq);
51 countTablePerGroup[namesOfGroups[i]][seq.getName()] = groupCounts[i];
59 m->errorOut(e, "SequenceCountParser", "SequenceCountParser");
63 /************************************************************/
64 SequenceCountParser::SequenceCountParser(string fastafile, CountTable& countTable) {
67 m = MothurOut::getInstance();
70 if (countTable.hasGroupInfo()) {
71 namesOfGroups = countTable.getNamesOfGroups();
72 for (int i = 0; i < namesOfGroups.size(); i++) {
73 vector<Sequence> temp;
74 map<string, int> tempMap;
75 seqs[namesOfGroups[i]] = temp;
76 countTablePerGroup[namesOfGroups[i]] = tempMap;
79 //read fasta file making sure each sequence is in the group file
81 m->openInputFile(fastafile, in);
86 if (m->control_pressed) { break; }
88 Sequence seq(in); m->gobble(in);
90 if (m->debug) { if((fastaCount) % 1000 == 0){ m->mothurOut("[DEBUG]: reading seq " + toString(fastaCount) + "\n."); } }
92 if (seq.getName() != "") {
94 allSeqsMap[seq.getName()] = seq.getName();
95 vector<int> groupCounts = countTable.getGroupCounts(seq.getName());
97 for (int i = 0; i < namesOfGroups.size(); i++) {
98 if (groupCounts[i] != 0) {
99 seqs[namesOfGroups[i]].push_back(seq);
100 countTablePerGroup[namesOfGroups[i]][seq.getName()] = groupCounts[i];
106 }else { m->control_pressed = true; m->mothurOut("[ERROR]: cannot parse fasta file by group with a count table that does not include group data, please correct.\n"); }
109 catch(exception& e) {
110 m->errorOut(e, "SequenceCountParser", "SequenceCountParser");
114 /************************************************************/
115 SequenceCountParser::~SequenceCountParser(){ }
116 /************************************************************/
117 int SequenceCountParser::getNumGroups(){ return namesOfGroups.size(); }
118 /************************************************************/
119 vector<string> SequenceCountParser::getNamesOfGroups(){ return namesOfGroups; }
120 /************************************************************/
121 int SequenceCountParser::getNumSeqs(string g){
123 map<string, vector<Sequence> >::iterator it;
127 if(it == seqs.end()) {
128 m->mothurOut("[ERROR]: " + g + " is not a valid group, please correct."); m->mothurOutEndLine();
130 num = (it->second).size();
135 catch(exception& e) {
136 m->errorOut(e, "SequenceCountParser", "getNumSeqs");
140 /************************************************************/
141 vector<Sequence> SequenceCountParser::getSeqs(string g){
143 map<string, vector<Sequence> >::iterator it;
144 vector<Sequence> seqForThisGroup;
147 if(it == seqs.end()) {
148 m->mothurOut("[ERROR]: No sequences available for group " + g + ", please correct."); m->mothurOutEndLine();
150 seqForThisGroup = it->second;
151 if (m->debug) { m->mothurOut("[DEBUG]: group " + g + " fasta file has " + toString(seqForThisGroup.size()) + " sequences."); }
154 return seqForThisGroup;
156 catch(exception& e) {
157 m->errorOut(e, "SequenceCountParser", "getSeqs");
161 /************************************************************/
162 int SequenceCountParser::getSeqs(string g, string filename, bool uchimeFormat=false){
164 map<string, vector<Sequence> >::iterator it;
165 vector<Sequence> seqForThisGroup;
166 vector<seqPriorityNode> nameVector;
169 if(it == seqs.end()) {
170 m->mothurOut("[ERROR]: No sequences available for group " + g + ", please correct."); m->mothurOutEndLine();
174 m->openOutputFile(filename, out);
176 seqForThisGroup = it->second;
179 // format should look like
180 //>seqName /ab=numRedundantSeqs/
183 map<string, int> countForThisGroup = getCountTable(g);
184 map<string, int>::iterator itCount;
187 for (int i = 0; i < seqForThisGroup.size(); i++) {
188 itCount = countForThisGroup.find(seqForThisGroup[i].getName());
190 if (itCount == countForThisGroup.end()){
192 m->mothurOut("[ERROR]: " + seqForThisGroup[i].getName() + " is in your fastafile, but is not in your count file, please correct."); m->mothurOutEndLine();
194 seqPriorityNode temp(itCount->second, seqForThisGroup[i].getAligned(), seqForThisGroup[i].getName());
195 nameVector.push_back(temp);
199 if (error == 1) { out.close(); m->mothurRemove(filename); return 1; }
201 //sort by num represented
202 sort(nameVector.begin(), nameVector.end(), compareSeqPriorityNodes);
204 //print new file in order of
205 for (int i = 0; i < nameVector.size(); i++) {
207 if(m->control_pressed) { out.close(); m->mothurRemove(filename); return 1; }
209 out << ">" << nameVector[i].name << "/ab=" << nameVector[i].numIdentical << "/" << endl << nameVector[i].seq << endl; //
213 //m->mothurOut("Group " + g + " contains " + toString(seqForThisGroup.size()) + " unique seqs.\n");
214 for (int i = 0; i < seqForThisGroup.size(); i++) {
216 if(m->control_pressed) { out.close(); m->mothurRemove(filename); return 1; }
218 seqForThisGroup[i].printSequence(out);
226 catch(exception& e) {
227 m->errorOut(e, "SequenceCountParser", "getSeqs");
232 /************************************************************/
233 map<string, int> SequenceCountParser::getCountTable(string g){
235 map<string, map<string, int> >::iterator it;
236 map<string, int> countForThisGroup;
238 it = countTablePerGroup.find(g);
239 if(it == countTablePerGroup.end()) {
240 m->mothurOut("[ERROR]: No countTable available for group " + g + ", please correct."); m->mothurOutEndLine();
242 countForThisGroup = it->second;
243 if (m->debug) { m->mothurOut("[DEBUG]: group " + g + " count file has " + toString(countForThisGroup.size()) + " unique sequences."); }
246 return countForThisGroup;
248 catch(exception& e) {
249 m->errorOut(e, "SequenceCountParser", "getCountTable");
253 /************************************************************/
254 int SequenceCountParser::getCountTable(string g, string filename){
256 map<string, map<string, int> >::iterator it;
257 map<string, int> countForThisGroup;
259 it = countTablePerGroup.find(g);
260 if(it == countTablePerGroup.end()) {
261 m->mothurOut("[ERROR]: No countTable available for group " + g + ", please correct."); m->mothurOutEndLine();
263 countForThisGroup = it->second;
266 m->openOutputFile(filename, out);
267 out << "Representative_Sequence\ttotal\t" << g << endl;
269 for (map<string, int>::iterator itFile = countForThisGroup.begin(); itFile != countForThisGroup.end(); itFile++) {
271 if(m->control_pressed) { out.close(); m->mothurRemove(filename); return 1; }
273 out << itFile->first << '\t' << itFile->second << '\t' << itFile->second << endl;
281 catch(exception& e) {
282 m->errorOut(e, "SequenceParser", "getCountTable");
286 /************************************************************/