X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=groupmap.cpp;h=9b8aa3f8abc87ef9ff93520a80b3da9169d406f4;hp=481fd1decfc4516eb5e1cf618c6b4d40cffbc8a9;hb=b206f634aae1b4ce13978d203247fb64757d5482;hpb=0caf3fbabaa3ece404f8ce77f4c883dc5b1bf1dc diff --git a/groupmap.cpp b/groupmap.cpp index 481fd1d..9b8aa3f 100644 --- a/groupmap.cpp +++ b/groupmap.cpp @@ -20,60 +20,293 @@ /************************************************************/ GroupMap::~GroupMap(){} - /************************************************************/ int GroupMap::readMap() { - string seqName, seqGroup; + try { + string seqName, seqGroup; int error = 0; - - while(fileHandle){ - fileHandle >> seqName; m->gobble(fileHandle); //read from first column - fileHandle >> seqGroup; //read from second column - - if (m->control_pressed) { fileHandle.close(); return 1; } - - setNamesOfGroups(seqGroup); - - it = groupmap.find(seqName); - - if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } - else { - groupmap[seqName] = seqGroup; //store data in map - seqsPerGroup[seqGroup]++; //increment number of seqs in that group - } - m->gobble(fileHandle); - } + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + + while (!fileHandle.eof()) { + if (m->control_pressed) { fileHandle.close(); return 1; } + + fileHandle.read(buffer, 4096); + vector pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } + m->checkName(seqName); + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } + } fileHandle.close(); + + if (rest != "") { + vector pieces = m->splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } + m->checkName(seqName); + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } + } + m->setAllGroups(namesOfGroups); return error; + } + catch(exception& e) { + m->errorOut(e, "GroupMap", "readMap"); + exit(1); + } } /************************************************************/ int GroupMap::readDesignMap() { - string seqName, seqGroup; + try { + string seqName, seqGroup; int error = 0; + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + + while (!fileHandle.eof()) { + if (m->control_pressed) { fileHandle.close(); return 1; } + + fileHandle.read(buffer, 4096); + vector pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } + m->checkName(seqName); + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } + } + fileHandle.close(); + + if (rest != "") { + vector pieces = m->splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } + m->checkName(seqName); + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } - while(fileHandle){ - fileHandle >> seqName; m->gobble(fileHandle); //read from first column - fileHandle >> seqGroup; //read from second column - - if (m->control_pressed) { fileHandle.close(); return 1; } - - setNamesOfGroups(seqGroup); - - it = groupmap.find(seqName); - - if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 group named " + seqName + ", group names must be unique. Please correct."); m->mothurOutEndLine(); } - else { - groupmap[seqName] = seqGroup; //store data in map - seqsPerGroup[seqGroup]++; //increment number of seqs in that group - } - m->gobble(fileHandle); - } + } + + m->setAllGroups(namesOfGroups); + return error; + } + catch(exception& e) { + m->errorOut(e, "GroupMap", "readDesignMap"); + exit(1); + } +} +/************************************************************/ +int GroupMap::readMap(string filename) { + try { + groupFileName = filename; + m->openInputFile(filename, fileHandle); + index = 0; + string seqName, seqGroup; + int error = 0; + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + + while (!fileHandle.eof()) { + if (m->control_pressed) { fileHandle.close(); return 1; } + + fileHandle.read(buffer, 4096); + vector pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } + m->checkName(seqName); + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your group file contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } + } fileHandle.close(); + + if (rest != "") { + vector pieces = m->splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } + m->checkName(seqName); + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your group file contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } + } + m->setAllGroups(namesOfGroups); return error; + } + catch(exception& e) { + m->errorOut(e, "GroupMap", "readMap"); + exit(1); + } +} +/************************************************************/ +int GroupMap::readDesignMap(string filename) { + try { + groupFileName = filename; + m->openInputFile(filename, fileHandle); + index = 0; + string seqName, seqGroup; + int error = 0; + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + + while (!fileHandle.eof()) { + if (m->control_pressed) { fileHandle.close(); return 1; } + + fileHandle.read(buffer, 4096); + vector pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } + m->checkName(seqName); + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } + } + fileHandle.close(); + + if (rest != "") { + vector pieces = m->splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } + m->checkName(seqName); + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } + } + + m->setAllGroups(namesOfGroups); + return error; + } + catch(exception& e) { + m->errorOut(e, "GroupMap", "readDesignMap"); + exit(1); + } } - /************************************************************/ int GroupMap::getNumGroups() { return namesOfGroups.size(); } /************************************************************/ @@ -84,6 +317,10 @@ string GroupMap::getGroup(string sequenceName) { if (it != groupmap.end()) { //sequence name was in group file return it->second; }else { + //look for it in names of groups to see if the user accidently used the wrong file + if (m->inUsersGroups(sequenceName, namesOfGroups)) { + m->mothurOut("[WARNING]: Your group or design file contains a group named " + sequenceName + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group."); m->mothurOutEndLine(); + } return "not found"; } } @@ -92,7 +329,7 @@ string GroupMap::getGroup(string sequenceName) { void GroupMap::setGroup(string sequenceName, string groupN) { setNamesOfGroups(groupN); - + m->checkName(sequenceName); it = groupmap.find(sequenceName); if (it != groupmap.end()) { m->mothurOut("Your groupfile contains more than 1 sequence named " + sequenceName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } @@ -135,6 +372,22 @@ bool GroupMap::isValidGroup(string groupname) { } } /************************************************************/ +int GroupMap::getCopy(GroupMap* g) { + try { + vector names = g->getNamesSeqs(); + for (int i = 0; i < names.size(); i++) { + if (m->control_pressed) { break; } + string group = g->getGroup(names[i]); + setGroup(names[i], group); + } + return names.size(); + } + catch(exception& e) { + m->errorOut(e, "GroupMap", "getCopy"); + exit(1); + } +} +/************************************************************/ int GroupMap::getNumSeqs(string group) { try { @@ -152,7 +405,66 @@ int GroupMap::getNumSeqs(string group) { exit(1); } } - +/************************************************************/ +int GroupMap::renameSeq(string oldName, string newName) { + try { + + map::iterator itName; + + itName = groupmap.find(oldName); + + if (itName == groupmap.end()) { + m->mothurOut("[ERROR]: cannot find " + toString(oldName) + " in group file"); + m->control_pressed = true; + return 0; + }else { + string group = itName->second; + groupmap.erase(itName); + groupmap[newName] = group; + } + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "GroupMap", "renameSeq"); + exit(1); + } +} +/************************************************************/ +int GroupMap::print(ofstream& out) { + try { + + for (map::iterator itName = groupmap.begin(); itName != groupmap.end(); itName++) { + out << itName->first << '\t' << itName->second << endl; + } + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "GroupMap", "print"); + exit(1); + } +} +/************************************************************/ +int GroupMap::print(ofstream& out, vector userGroups) { + try { + + for (map::iterator itName = groupmap.begin(); itName != groupmap.end(); itName++) { + if (m->inUsersGroups(itName->second, userGroups)) { + out << itName->first << '\t' << itName->second << endl; + } + } + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "GroupMap", "print"); + exit(1); + } +} /************************************************************/ vector GroupMap::getNamesSeqs(){ try {