X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=groupmap.cpp;h=9d2736405f8e306dc8e5238f8bad1a26b92b88bb;hb=70491a12902e89b85cfa6b44a7b7fbe066ee2ac1;hp=6521128016900efe90b8f85b82a808f9f8791709;hpb=462fca6d24fd15ca035358ff70bcfae52c3281c3;p=mothur.git diff --git a/groupmap.cpp b/groupmap.cpp index 6521128..9d27364 100644 --- a/groupmap.cpp +++ b/groupmap.cpp @@ -12,29 +12,300 @@ /************************************************************/ GroupMap::GroupMap(string filename) { + m = MothurOut::getInstance(); groupFileName = filename; - openInputFile(filename, fileHandle); + m->openInputFile(filename, fileHandle); index = 0; } /************************************************************/ GroupMap::~GroupMap(){} +/************************************************************/ +int GroupMap::readMap() { + try { + string seqName, seqGroup; + int error = 0; + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + + while (!fileHandle.eof()) { + if (m->control_pressed) { fileHandle.close(); return 1; } + + fileHandle.read(buffer, 4096); + vector pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } + m->checkName(seqName); + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } + } + fileHandle.close(); + + if (rest != "") { + vector pieces = m->splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } + m->checkName(seqName); + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } + } + + m->setAllGroups(namesOfGroups); + return error; + } + catch(exception& e) { + m->errorOut(e, "GroupMap", "readMap"); + exit(1); + } +} +/************************************************************/ +int GroupMap::readDesignMap() { + try { + string seqName, seqGroup; + int error = 0; + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + + while (!fileHandle.eof()) { + if (m->control_pressed) { fileHandle.close(); return 1; } + + fileHandle.read(buffer, 4096); + vector pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } + m->checkName(seqName); + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } + } + fileHandle.close(); + + if (rest != "") { + vector pieces = m->splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } + m->checkName(seqName); + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } + } + + m->setAllGroups(namesOfGroups); + return error; + } + catch(exception& e) { + m->errorOut(e, "GroupMap", "readDesignMap"); + exit(1); + } +} /************************************************************/ -void GroupMap::readMap() { - string seqName, seqGroup; - - while(fileHandle){ - fileHandle >> seqName; //read from first column - fileHandle >> seqGroup; //read from second column - - setNamesOfGroups(seqGroup); - - groupmap[seqName] = seqGroup; //store data in map - - gobble(fileHandle); - } +int GroupMap::readMap(string filename) { + try { + groupFileName = filename; + m->openInputFile(filename, fileHandle); + index = 0; + string seqName, seqGroup; + int error = 0; + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + + while (!fileHandle.eof()) { + if (m->control_pressed) { fileHandle.close(); return 1; } + + fileHandle.read(buffer, 4096); + vector pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } + m->checkName(seqName); + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your group file contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } + } fileHandle.close(); + + if (rest != "") { + vector pieces = m->splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } + m->checkName(seqName); + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your group file contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } + } + + m->setAllGroups(namesOfGroups); + return error; + } + catch(exception& e) { + m->errorOut(e, "GroupMap", "readMap"); + exit(1); + } +} +/************************************************************/ +int GroupMap::readDesignMap(string filename) { + try { + groupFileName = filename; + m->openInputFile(filename, fileHandle); + index = 0; + string seqName, seqGroup; + int error = 0; + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + + while (!fileHandle.eof()) { + if (m->control_pressed) { fileHandle.close(); return 1; } + + fileHandle.read(buffer, 4096); + vector pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } + m->checkName(seqName); + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } + } + fileHandle.close(); + + if (rest != "") { + vector pieces = m->splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } + m->checkName(seqName); + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + pairDone = false; + } + } + } + + m->setAllGroups(namesOfGroups); + return error; + } + catch(exception& e) { + m->errorOut(e, "GroupMap", "readDesignMap"); + exit(1); + } } /************************************************************/ int GroupMap::getNumGroups() { return namesOfGroups.size(); } @@ -46,30 +317,45 @@ string GroupMap::getGroup(string sequenceName) { if (it != groupmap.end()) { //sequence name was in group file return it->second; }else { + //look for it in names of groups to see if the user accidently used the wrong file + if (m->inUsersGroups(sequenceName, namesOfGroups)) { + m->mothurOut("[WARNING]: Your group or design file contains a group named " + sequenceName + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group."); m->mothurOutEndLine(); + } return "not found"; } } /************************************************************/ + void GroupMap::setGroup(string sequenceName, string groupN) { - groupmap[sequenceName] = groupN; + setNamesOfGroups(groupN); + m->checkName(sequenceName); + it = groupmap.find(sequenceName); + + if (it != groupmap.end()) { m->mothurOut("Your groupfile contains more than 1 sequence named " + sequenceName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[sequenceName] = groupN; //store data in map + seqsPerGroup[groupN]++; //increment number of seqs in that group + } } + /************************************************************/ void GroupMap::setNamesOfGroups(string seqGroup) { - int i, count; - count = 0; - for (i=0; ierrorOut(e, "GroupMap", "isValidGroup"); + exit(1); + } +} +/************************************************************/ +int GroupMap::getCopy(GroupMap* g) { + try { + vector names = g->getNamesSeqs(); + for (int i = 0; i < names.size(); i++) { + if (m->control_pressed) { break; } + string group = g->getGroup(names[i]); + setGroup(names[i], group); + } + return names.size(); + } + catch(exception& e) { + m->errorOut(e, "GroupMap", "getCopy"); + exit(1); + } +} +/************************************************************/ +int GroupMap::getNumSeqs(string group) { + try { + + map::iterator itNum; + + itNum = seqsPerGroup.find(group); + + if (itNum == seqsPerGroup.end()) { return 0; } + + return seqsPerGroup[group]; + + } + catch(exception& e) { + m->errorOut(e, "GroupMap", "getNumSeqs"); exit(1); } } @@ -99,9 +419,30 @@ vector GroupMap::getNamesSeqs(){ return names; } catch(exception& e) { - errorOut(e, "GroupMap", "getNamesSeqs"); + m->errorOut(e, "GroupMap", "getNamesSeqs"); exit(1); } } /************************************************************/ +vector GroupMap::getNamesSeqs(vector picked){ + try { + + vector names; + + for (it = groupmap.begin(); it != groupmap.end(); it++) { + //if you are belong to one the the groups in the picked vector add you + if (m->inUsersGroups(it->second, picked)) { + names.push_back(it->first); + } + } + + return names; + } + catch(exception& e) { + m->errorOut(e, "GroupMap", "getNamesSeqs"); + exit(1); + } +} + +/************************************************************/