*
*/
-#include "sequenceParser.h"
+#include "sequenceparser.h"
/************************************************************/
m->openInputFile(fastaFile, in);
map<string, string> seqName; //stores name -> sequence string so we can make new "unique" sequences when we parse the name file
+ int fastaCount = 0;
while (!in.eof()) {
if (m->control_pressed) { break; }
Sequence seq(in); m->gobble(in);
+ fastaCount++;
+ if (m->debug) { if((fastaCount) % 1000 == 0){ m->mothurOut("[DEBUG]: reading seq " + toString(fastaCount) + "\n."); } }
- if (seq.getName() != "") {
+ if (seq.getName() != "") {
string group = groupMap->getGroup(seq.getName());
if (group == "not found") { error = 1; m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file and not in your groupfile, please correct."); m->mothurOutEndLine(); }
string first, second;
int countName = 0;
+ set<string> thisnames1;
+
while(!inName.eof()) {
if (m->control_pressed) { break; }
it = splitMap.find(group);
if (it != splitMap.end()) { //adding seqs to this group
(it->second) += "," + names[i];
+ thisnames1.insert(names[i]);
countName++;
}else { //first sighting of this group
splitMap[group] = names[i];
countName++;
+ thisnames1.insert(names[i]);
//is this seq in the fasta file?
if (i != 0) { //if not then we need to add a duplicate sequence to the seqs for this group so the new "fasta" and "name" files will match
inName.close();
if (error == 1) { m->control_pressed = true; }
-
+
if (countName != (groupMap->getNumSeqs())) {
+ vector<string> groupseqsnames = groupMap->getNamesSeqs();
+
+ for (int i = 0; i < groupseqsnames.size(); i++) {
+ set<string>::iterator itnamesfile = thisnames1.find(groupseqsnames[i]);
+ if (itnamesfile == thisnames1.end()){
+ cout << "missing name " + groupseqsnames[i] << '\t' << allSeqsMap[groupseqsnames[i]] << endl;
+ }
+ }
+
m->mothurOutEndLine();
m->mothurOut("[ERROR]: Your name file contains " + toString(countName) + " valid sequences, and your groupfile contains " + toString(groupMap->getNumSeqs()) + ", please correct.");
m->mothurOutEndLine();
m->mothurOut("[ERROR]: No sequences available for group " + g + ", please correct."); m->mothurOutEndLine();
}else {
seqForThisGroup = it->second;
+ if (m->debug) { m->mothurOut("[DEBUG]: group " + g + " fasta file has " + toString(seqForThisGroup.size()) + " sequences."); }
}
return seqForThisGroup;
}
}else {
+ //m->mothurOut("Group " + g + " contains " + toString(seqForThisGroup.size()) + " unique seqs.\n");
for (int i = 0; i < seqForThisGroup.size(); i++) {
if(m->control_pressed) { out.close(); m->mothurRemove(filename); return 1; }
m->mothurOut("[ERROR]: No nameMap available for group " + g + ", please correct."); m->mothurOutEndLine();
}else {
nameMapForThisGroup = it->second;
+ if (m->debug) { m->mothurOut("[DEBUG]: group " + g + " name file has " + toString(nameMapForThisGroup.size()) + " unique sequences."); }
}
return nameMapForThisGroup;