X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=groupmap.cpp;h=9b8aa3f8abc87ef9ff93520a80b3da9169d406f4;hp=92a43e965044c06c2fdcaca78d50779440b301ab;hb=050a3ff02473a3d4c0980964e1a9ebe52e55d6b8;hpb=05c52893c6c2467381fe7e7b769d86b6209af2e1 diff --git a/groupmap.cpp b/groupmap.cpp index 92a43e9..9b8aa3f 100644 --- a/groupmap.cpp +++ b/groupmap.cpp @@ -20,87 +20,292 @@ /************************************************************/ GroupMap::~GroupMap(){} - /************************************************************/ int GroupMap::readMap() { - string seqName, seqGroup; + try { + string seqName, seqGroup; int error = 0; - - while(fileHandle){ - fileHandle >> seqName; m->gobble(fileHandle); //read from first column - fileHandle >> seqGroup; //read from second column - - if (m->control_pressed) { fileHandle.close(); return 1; } - - setNamesOfGroups(seqGroup); - - it = groupmap.find(seqName); - - if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } - else { - groupmap[seqName] = seqGroup; //store data in map - seqsPerGroup[seqGroup]++; //increment number of seqs in that group - } - m->gobble(fileHandle); - } + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + + while (!fileHandle.eof()) { + if (m->control_pressed) { fileHandle.close(); return 1; } + + fileHandle.read(buffer, 4096); + vector pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } + m->checkName(seqName); + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } + } fileHandle.close(); + + if (rest != "") { + vector pieces = m->splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } + m->checkName(seqName); + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } + } + m->setAllGroups(namesOfGroups); return error; + } + catch(exception& e) { + m->errorOut(e, "GroupMap", "readMap"); + exit(1); + } } /************************************************************/ int GroupMap::readDesignMap() { - string seqName, seqGroup; + try { + string seqName, seqGroup; int error = 0; - - while(fileHandle){ - fileHandle >> seqName; m->gobble(fileHandle); //read from first column - fileHandle >> seqGroup; //read from second column - - if (m->control_pressed) { fileHandle.close(); return 1; } - - setNamesOfGroups(seqGroup); - - it = groupmap.find(seqName); - - if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 group named " + seqName + ", group names must be unique. Please correct."); m->mothurOutEndLine(); } - else { - groupmap[seqName] = seqGroup; //store data in map - seqsPerGroup[seqGroup]++; //increment number of seqs in that group - } - m->gobble(fileHandle); - } + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + + while (!fileHandle.eof()) { + if (m->control_pressed) { fileHandle.close(); return 1; } + + fileHandle.read(buffer, 4096); + vector pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } + m->checkName(seqName); + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } + } fileHandle.close(); + + if (rest != "") { + vector pieces = m->splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } + m->checkName(seqName); + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } + + } + m->setAllGroups(namesOfGroups); return error; + } + catch(exception& e) { + m->errorOut(e, "GroupMap", "readDesignMap"); + exit(1); + } } /************************************************************/ -int GroupMap::readDesignMap(string filename) { - groupFileName = filename; - m->openInputFile(filename, fileHandle); - index = 0; - string seqName, seqGroup; - int error = 0; - - while(fileHandle){ - fileHandle >> seqName; m->gobble(fileHandle); //read from first column - fileHandle >> seqGroup; //read from second column +int GroupMap::readMap(string filename) { + try { + groupFileName = filename; + m->openInputFile(filename, fileHandle); + index = 0; + string seqName, seqGroup; + int error = 0; + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; - if (m->control_pressed) { fileHandle.close(); return 1; } + while (!fileHandle.eof()) { + if (m->control_pressed) { fileHandle.close(); return 1; } + + fileHandle.read(buffer, 4096); + vector pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } + m->checkName(seqName); + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your group file contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } + } + fileHandle.close(); + + if (rest != "") { + vector pieces = m->splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } + m->checkName(seqName); + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your group file contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } + } - setNamesOfGroups(seqGroup); + m->setAllGroups(namesOfGroups); + return error; + } + catch(exception& e) { + m->errorOut(e, "GroupMap", "readMap"); + exit(1); + } +} +/************************************************************/ +int GroupMap::readDesignMap(string filename) { + try { + groupFileName = filename; + m->openInputFile(filename, fileHandle); + index = 0; + string seqName, seqGroup; + int error = 0; + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; - it = groupmap.find(seqName); + while (!fileHandle.eof()) { + if (m->control_pressed) { fileHandle.close(); return 1; } + + fileHandle.read(buffer, 4096); + vector pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } + m->checkName(seqName); + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } + } + fileHandle.close(); - if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 group named " + seqName + ", group names must be unique. Please correct."); m->mothurOutEndLine(); } - else { - groupmap[seqName] = seqGroup; //store data in map - seqsPerGroup[seqGroup]++; //increment number of seqs in that group + if (rest != "") { + vector pieces = m->splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } + m->checkName(seqName); + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } } - m->gobble(fileHandle); + + m->setAllGroups(namesOfGroups); + return error; } - fileHandle.close(); - m->setAllGroups(namesOfGroups); - return error; + catch(exception& e) { + m->errorOut(e, "GroupMap", "readDesignMap"); + exit(1); + } } /************************************************************/ int GroupMap::getNumGroups() { return namesOfGroups.size(); } @@ -112,6 +317,10 @@ string GroupMap::getGroup(string sequenceName) { if (it != groupmap.end()) { //sequence name was in group file return it->second; }else { + //look for it in names of groups to see if the user accidently used the wrong file + if (m->inUsersGroups(sequenceName, namesOfGroups)) { + m->mothurOut("[WARNING]: Your group or design file contains a group named " + sequenceName + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group."); m->mothurOutEndLine(); + } return "not found"; } } @@ -120,7 +329,7 @@ string GroupMap::getGroup(string sequenceName) { void GroupMap::setGroup(string sequenceName, string groupN) { setNamesOfGroups(groupN); - + m->checkName(sequenceName); it = groupmap.find(sequenceName); if (it != groupmap.end()) { m->mothurOut("Your groupfile contains more than 1 sequence named " + sequenceName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } @@ -163,6 +372,22 @@ bool GroupMap::isValidGroup(string groupname) { } } /************************************************************/ +int GroupMap::getCopy(GroupMap* g) { + try { + vector names = g->getNamesSeqs(); + for (int i = 0; i < names.size(); i++) { + if (m->control_pressed) { break; } + string group = g->getGroup(names[i]); + setGroup(names[i], group); + } + return names.size(); + } + catch(exception& e) { + m->errorOut(e, "GroupMap", "getCopy"); + exit(1); + } +} +/************************************************************/ int GroupMap::getNumSeqs(string group) { try { @@ -180,7 +405,66 @@ int GroupMap::getNumSeqs(string group) { exit(1); } } - +/************************************************************/ +int GroupMap::renameSeq(string oldName, string newName) { + try { + + map::iterator itName; + + itName = groupmap.find(oldName); + + if (itName == groupmap.end()) { + m->mothurOut("[ERROR]: cannot find " + toString(oldName) + " in group file"); + m->control_pressed = true; + return 0; + }else { + string group = itName->second; + groupmap.erase(itName); + groupmap[newName] = group; + } + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "GroupMap", "renameSeq"); + exit(1); + } +} +/************************************************************/ +int GroupMap::print(ofstream& out) { + try { + + for (map::iterator itName = groupmap.begin(); itName != groupmap.end(); itName++) { + out << itName->first << '\t' << itName->second << endl; + } + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "GroupMap", "print"); + exit(1); + } +} +/************************************************************/ +int GroupMap::print(ofstream& out, vector userGroups) { + try { + + for (map::iterator itName = groupmap.begin(); itName != groupmap.end(); itName++) { + if (m->inUsersGroups(itName->second, userGroups)) { + out << itName->first << '\t' << itName->second << endl; + } + } + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "GroupMap", "print"); + exit(1); + } +} /************************************************************/ vector GroupMap::getNamesSeqs(){ try {