X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=treemap.cpp;h=47b7cf343635e2d104db75114aa77fbe8007c466;hp=c228162aab1d511af3fc5318aaf3e5eb5b39610c;hb=1a20e24ee786195ab0e1cccd4f5aede7a88f3f4e;hpb=f55cf350ca6643f8eb070d8336e1957699a3f109 diff --git a/treemap.cpp b/treemap.cpp index c228162..47b7cf3 100644 --- a/treemap.cpp +++ b/treemap.cpp @@ -13,6 +13,9 @@ TreeMap::TreeMap(string filename) { m = MothurOut::getInstance(); + ofstream out2; + m->openOutputFileAppend(filename, out2); + out2 << endl; out2.close(); groupFileName = filename; m->openInputFile(filename, fileHandle); } @@ -21,75 +24,167 @@ TreeMap::~TreeMap(){} /************************************************************/ int TreeMap::readMap(string gf) { - - groupFileName = gf; - m->openInputFile(gf, fileHandle); - - string seqName, seqGroup; - int error = 0; - - while(fileHandle){ - fileHandle >> seqName; m->gobble(fileHandle); //read from first column - fileHandle >> seqGroup; //read from second column + try { + ofstream out2; + m->openOutputFileAppend(gf, out2); + out2 << endl; out2.close(); - if (m->control_pressed) { fileHandle.close(); return 1; } + groupFileName = gf; + m->openInputFile(gf, fileHandle); - setNamesOfGroups(seqGroup); + string seqName, seqGroup; + int error = 0; + + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + + while (!fileHandle.eof()) { + if (m->control_pressed) { fileHandle.close(); return 1; } + + fileHandle.read(buffer, 4096); + vector pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + map::iterator itCheck = treemap.find(seqName); + if (itCheck != treemap.end()) { error = 1; m->mothurOut("[WARNING]: Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + namesOfSeqs.push_back(seqName); + treemap[seqName].groupname = seqGroup; //store data in map + + it2 = seqsPerGroup.find(seqGroup); + if (it2 == seqsPerGroup.end()) { //if it's a new group + seqsPerGroup[seqGroup] = 1; + }else {//it's a group we already have + seqsPerGroup[seqGroup]++; + } + } + pairDone = false; + } + } + } + fileHandle.close(); - map::iterator itCheck = treemap.find(seqName); - if (itCheck != treemap.end()) { error = 1; m->mothurOut("[WARNING]: Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } - else { - namesOfSeqs.push_back(seqName); - treemap[seqName].groupname = seqGroup; //store data in map + if (rest != "") { + vector pieces = m->splitWhiteSpace(rest); - it2 = seqsPerGroup.find(seqGroup); - if (it2 == seqsPerGroup.end()) { //if it's a new group - seqsPerGroup[seqGroup] = 1; - }else {//it's a group we already have - seqsPerGroup[seqGroup]++; - } + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + map::iterator itCheck = treemap.find(seqName); + if (itCheck != treemap.end()) { error = 1; m->mothurOut("[WARNING]: Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + namesOfSeqs.push_back(seqName); + treemap[seqName].groupname = seqGroup; //store data in map + + it2 = seqsPerGroup.find(seqGroup); + if (it2 == seqsPerGroup.end()) { //if it's a new group + seqsPerGroup[seqGroup] = 1; + }else {//it's a group we already have + seqsPerGroup[seqGroup]++; + } + } + pairDone = false; + } + } } - m->gobble(fileHandle); + return error; } - fileHandle.close(); - - return error; + catch(exception& e) { + m->errorOut(e, "TreeMap", "readMap"); + exit(1); + } } /************************************************************/ int TreeMap::readMap() { - string seqName, seqGroup; - int error = 0; - - while(fileHandle){ - fileHandle >> seqName; m->gobble(fileHandle); //read from first column - fileHandle >> seqGroup; //read from second column - - if (m->control_pressed) { fileHandle.close(); return 1; } - - setNamesOfGroups(seqGroup); - - map::iterator itCheck = treemap.find(seqName); - if (itCheck != treemap.end()) { error = 1; m->mothurOut("[WARNING]: Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } - else { - namesOfSeqs.push_back(seqName); - treemap[seqName].groupname = seqGroup; //store data in map - - it2 = seqsPerGroup.find(seqGroup); - if (it2 == seqsPerGroup.end()) { //if it's a new group - seqsPerGroup[seqGroup] = 1; - }else {//it's a group we already have - seqsPerGroup[seqGroup]++; - } - } - - m->gobble(fileHandle); - } - fileHandle.close(); - - - return error; + try { + string seqName, seqGroup; + int error = 0; + + string rest = ""; + char buffer[4096]; + bool pairDone = false; + bool columnOne = true; + + while (!fileHandle.eof()) { + if (m->control_pressed) { fileHandle.close(); return 1; } + + fileHandle.read(buffer, 4096); + vector pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount()); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + map::iterator itCheck = treemap.find(seqName); + if (itCheck != treemap.end()) { error = 1; m->mothurOut("[WARNING]: Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + namesOfSeqs.push_back(seqName); + treemap[seqName].groupname = seqGroup; //store data in map + + it2 = seqsPerGroup.find(seqGroup); + if (it2 == seqsPerGroup.end()) { //if it's a new group + seqsPerGroup[seqGroup] = 1; + }else {//it's a group we already have + seqsPerGroup[seqGroup]++; + } + } + pairDone = false; + } + } + } + fileHandle.close(); + + if (rest != "") { + vector pieces = m->splitWhiteSpace(rest); + + for (int i = 0; i < pieces.size(); i++) { + if (columnOne) { seqName = pieces[i]; columnOne=false; } + else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } + + if (pairDone) { + setNamesOfGroups(seqGroup); + + map::iterator itCheck = treemap.find(seqName); + if (itCheck != treemap.end()) { error = 1; m->mothurOut("[WARNING]: Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + namesOfSeqs.push_back(seqName); + treemap[seqName].groupname = seqGroup; //store data in map + + it2 = seqsPerGroup.find(seqGroup); + if (it2 == seqsPerGroup.end()) { //if it's a new group + seqsPerGroup[seqGroup] = 1; + }else {//it's a group we already have + seqsPerGroup[seqGroup]++; + } + } + pairDone = false; + } + } + } + + return error; + } + catch(exception& e) { + m->errorOut(e, "TreeMap", "readMap"); + exit(1); + } } /************************************************************/ void TreeMap::addSeq(string seqName, string seqGroup) { @@ -151,26 +246,6 @@ string TreeMap::getGroup(string sequenceName) { return "not found"; } -} -/************************************************************/ -void TreeMap::setIndex(string seq, int index) { - it = treemap.find(seq); - if (it != treemap.end()) { //sequence name was in group file - treemap[seq].vectorIndex = index; - }else { - treemap[seq].vectorIndex = index; - treemap[seq].groupname = "not found"; - } -} -/************************************************************/ -int TreeMap::getIndex(string seq) { - - it = treemap.find(seq); - // if it is a valid sequence name then return index - if (it != treemap.end()) { return treemap[seq].vectorIndex; } - // if not return error code - else { return -1; } - } /************************************************************/