5 * Created by westcott on 9/9/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "sequenceParser.h"
13 /************************************************************/
14 SequenceParser::SequenceParser(string groupFile, string fastaFile, string nameFile) {
17 m = MothurOut::getInstance();
21 groupMap = new GroupMap(groupFile);
22 error = groupMap->readMap();
24 if (error == 1) { m->control_pressed = true; }
27 vector<string> namesOfGroups = groupMap->getNamesOfGroups();
28 for (int i = 0; i < namesOfGroups.size(); i++) {
29 vector<Sequence> temp;
30 map<string, string> tempMap;
31 seqs[namesOfGroups[i]] = temp;
32 nameMapPerGroup[namesOfGroups[i]] = tempMap;
35 //read fasta file making sure each sequence is in the group file
37 m->openInputFile(fastaFile, in);
39 map<string, string> seqName; //stores name -> sequence string so we can make new "unique" sequences when we parse the name file
42 if (m->control_pressed) { break; }
44 Sequence seq(in); m->gobble(in);
46 if (seq.getName() != "") {
48 string group = groupMap->getGroup(seq.getName());
49 if (group == "not found") { error = 1; m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file and not in your groupfile, please correct."); m->mothurOutEndLine(); }
51 seqs[group].push_back(seq);
52 seqName[seq.getName()] = seq.getAligned();
58 if (error == 1) { m->control_pressed = true; }
62 m->openInputFile(nameFile, inName);
66 while(!inName.eof()) {
68 if (m->control_pressed) { break; }
70 inName >> first; m->gobble(inName);
71 inName >> second; m->gobble(inName);
74 m->splitAtChar(second, names, ',');
76 //get aligned string for these seqs from the fasta file
77 string alignedString = "";
78 map<string, string>::iterator itAligned = seqName.find(names[0]);
79 if (itAligned == seqName.end()) {
80 error = 1; m->mothurOut("[ERROR]: " + names[0] + " is in your name file and not in your fasta file, please correct."); m->mothurOutEndLine();
82 alignedString = itAligned->second;
85 //separate by group - parse one line in name file
86 map<string, string> splitMap; //group -> name1,name2,...
87 map<string, string>::iterator it;
88 for (int i = 0; i < names.size(); i++) {
90 string group = groupMap->getGroup(names[i]);
91 if (group == "not found") { error = 1; m->mothurOut("[ERROR]: " + names[i] + " is in your name file and not in your groupfile, please correct."); m->mothurOutEndLine(); }
94 it = splitMap.find(group);
95 if (it != splitMap.end()) { //adding seqs to this group
96 (it->second) += "," + names[i];
98 }else { //first sighting of this group
99 splitMap[group] = names[i];
102 //is this seq in the fasta file?
103 if (i != 0) { //if not then we need to add a duplicate sequence to the seqs for this group so the new "fasta" and "name" files will match
104 Sequence tempSeq(names[i], alignedString); //get the first guys sequence string since he's in the fasta file.
105 seqs[group].push_back(tempSeq);
112 //fill nameMapPerGroup - holds all lines in namefile separated by group
113 for (it = splitMap.begin(); it != splitMap.end(); it++) {
115 string firstName = "";
116 for(int i = 0; i < (it->second).length(); i++) {
117 if (((it->second)[i]) != ',') {
118 firstName += ((it->second)[i]);
122 //group1 -> seq1 -> seq1,seq2,seq3
123 nameMapPerGroup[it->first][firstName] = it->second;
129 if (error == 1) { m->control_pressed = true; }
131 if (countName != (groupMap->getNumSeqs())) {
132 m->mothurOutEndLine();
133 m->mothurOut("[ERROR]: Your name file contains " + toString(countName) + " valid sequences, and your groupfile contains " + toString(groupMap->getNumSeqs()) + ", please correct.");
134 m->mothurOutEndLine();
135 m->control_pressed = true;
139 catch(exception& e) {
140 m->errorOut(e, "SequenceParser", "SequenceParser");
144 /************************************************************/
145 SequenceParser::SequenceParser(string groupFile, string fastaFile) {
148 m = MothurOut::getInstance();
152 groupMap = new GroupMap(groupFile);
153 error = groupMap->readMap();
155 if (error == 1) { m->control_pressed = true; }
158 vector<string> namesOfGroups = groupMap->getNamesOfGroups();
159 for (int i = 0; i < namesOfGroups.size(); i++) {
160 vector<Sequence> temp;
161 seqs[namesOfGroups[i]] = temp;
164 //read fasta file making sure each sequence is in the group file
166 m->openInputFile(fastaFile, in);
171 if (m->control_pressed) { break; }
173 Sequence seq(in); m->gobble(in);
175 if (seq.getName() != "") {
177 string group = groupMap->getGroup(seq.getName());
178 if (group == "not found") { error = 1; m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file and not in your groupfile, please correct."); m->mothurOutEndLine(); }
179 else { seqs[group].push_back(seq); count++; }
184 if (error == 1) { m->control_pressed = true; }
186 if (count != (groupMap->getNumSeqs())) {
187 m->mothurOutEndLine();
188 m->mothurOut("[ERROR]: Your fasta file contains " + toString(count) + " valid sequences, and your groupfile contains " + toString(groupMap->getNumSeqs()) + ", please correct.");
189 if (count < (groupMap->getNumSeqs())) { m->mothurOut(" Did you forget to include the name file?"); }
190 m->mothurOutEndLine();
191 m->control_pressed = true;
195 catch(exception& e) {
196 m->errorOut(e, "SequenceParser", "SequenceParser");
200 /************************************************************/
201 SequenceParser::~SequenceParser(){ delete groupMap; }
202 /************************************************************/
203 int SequenceParser::getNumGroups(){ return groupMap->getNumGroups(); }
204 /************************************************************/
205 vector<string> SequenceParser::getNamesOfGroups(){ return groupMap->getNamesOfGroups(); }
206 /************************************************************/
207 bool SequenceParser::isValidGroup(string g){ return groupMap->isValidGroup(g); }
208 /************************************************************/
209 string SequenceParser::getGroup(string g){ return groupMap->getGroup(g); }
210 /************************************************************/
211 int SequenceParser::getNumSeqs(string g){
213 map<string, vector<Sequence> >::iterator it;
217 if(it == seqs.end()) {
218 m->mothurOut("[ERROR]: " + g + " is not a valid group, please correct."); m->mothurOutEndLine();
220 num = (it->second).size();
225 catch(exception& e) {
226 m->errorOut(e, "SequenceParser", "getNumSeqs");
230 /************************************************************/
231 vector<Sequence> SequenceParser::getSeqs(string g){
233 map<string, vector<Sequence> >::iterator it;
234 vector<Sequence> seqForThisGroup;
237 if(it == seqs.end()) {
238 m->mothurOut("[ERROR]: No sequences available for group " + g + ", please correct."); m->mothurOutEndLine();
240 seqForThisGroup = it->second;
243 return seqForThisGroup;
245 catch(exception& e) {
246 m->errorOut(e, "SequenceParser", "getSeqs");
250 /************************************************************/
251 map<string, string> SequenceParser::getNameMap(string g){
253 map<string, map<string, string> >::iterator it;
254 map<string, string> nameMapForThisGroup;
256 it = nameMapPerGroup.find(g);
257 if(it == nameMapPerGroup.end()) {
258 m->mothurOut("[ERROR]: No nameMap available for group " + g + ", please correct."); m->mothurOutEndLine();
260 nameMapForThisGroup = it->second;
263 return nameMapForThisGroup;
265 catch(exception& e) {
266 m->errorOut(e, "SequenceParser", "getNameMap");
270 /************************************************************/