X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=splitmatrix.cpp;h=b7a3b49362dbd7c67f54649ba1b846a5c7cf6c79;hb=6446d2e9713a95db5f772135b7aa3387f7ebf7bb;hp=9e53c51a8f1b6fbba8e755279e68993fb7f03fa5;hpb=a0f87c2ae6414af28d4e70b1e6830401eac21bef;p=mothur.git diff --git a/splitmatrix.cpp b/splitmatrix.cpp index 9e53c51..b7a3b49 100644 --- a/splitmatrix.cpp +++ b/splitmatrix.cpp @@ -24,7 +24,7 @@ SplitMatrix::SplitMatrix(string distfile, string name, string tax, float c, stri } /***********************************************************************/ -SplitMatrix::SplitMatrix(string ffile, string name, string tax, float c, string t, int p){ +SplitMatrix::SplitMatrix(string ffile, string name, string tax, float c, string t, int p, string output){ m = MothurOut::getInstance(); fastafile = ffile; namefile = name; @@ -32,6 +32,7 @@ SplitMatrix::SplitMatrix(string ffile, string name, string tax, float c, string cutoff = c; method = t; processors = p; + outputDir = output; } /***********************************************************************/ @@ -86,12 +87,12 @@ int SplitMatrix::splitClassify(){ PhyloTree* phylo = new PhyloTree(); ifstream in; - openInputFile(taxFile, in); + m->openInputFile(taxFile, in); //read in users taxonomy file and add sequences to tree string seqname, tax; while(!in.eof()){ - in >> seqname >> tax; gobble(in); + in >> seqname >> tax; m->gobble(in); phylo->addSeqToTree(seqname, tax); } in.close(); @@ -145,12 +146,12 @@ int SplitMatrix::createDistanceFilesFromTax(map& seqGroup, int numG } ifstream in; - openInputFile(fastafile, in); + m->openInputFile(fastafile, in); //parse fastafile ofstream outFile; while (!in.eof()) { - Sequence query(in); gobble(in); + Sequence query(in); m->gobble(in); if (query.getName() != "") { it = seqGroup.find(query.getName()); @@ -159,11 +160,11 @@ int SplitMatrix::createDistanceFilesFromTax(map& seqGroup, int numG if (namefile == "") { names.insert(query.getName()); } if (it != seqGroup.end()) { //not singleton - openOutputFileAppend((fastafile + "." + toString(it->second) + ".temp"), outFile); + m->openOutputFileAppend((fastafile + "." + toString(it->second) + ".temp"), outFile); query.printSequence(outFile); outFile.close(); - copyGroups.erase(it); + copyGroups.erase(query.getName()); } } } @@ -180,7 +181,7 @@ int SplitMatrix::createDistanceFilesFromTax(map& seqGroup, int numG //process each distance file for (int i = 0; i < numGroups; i++) { - string options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors); + string options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors) + ", cutoff=" + toString(cutoff); Command* command = new DistanceCommand(options); command->execute(); @@ -194,22 +195,22 @@ int SplitMatrix::createDistanceFilesFromTax(map& seqGroup, int numG singleton = namefile + ".extra.temp"; ofstream remainingNames; - openOutputFile(singleton, remainingNames); + m->openOutputFile(singleton, remainingNames); bool wroteExtra = false; ifstream bigNameFile; - openInputFile(namefile, bigNameFile); + m->openInputFile(namefile, bigNameFile); string name, nameList; while(!bigNameFile.eof()){ - bigNameFile >> name >> nameList; gobble(bigNameFile); + bigNameFile >> name >> nameList; m->gobble(bigNameFile); //did this sequence get assigned a group it = seqGroup.find(name); if (it != seqGroup.end()) { - openOutputFileAppend((namefile + "." + toString(it->second) + ".temp"), outFile); + m->openOutputFileAppend((namefile + "." + toString(it->second) + ".temp"), outFile); outFile << name << '\t' << nameList << endl; outFile.close(); }else{ @@ -219,30 +220,42 @@ int SplitMatrix::createDistanceFilesFromTax(map& seqGroup, int numG } bigNameFile.close(); - remainingNames.close(); - if (!wroteExtra) { - remove(singleton.c_str()); - singleton = "none"; - } - for(int i=0;ihasPath(fastafile); } + string tempDistFile = outputDir + m->getRootName(m->getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "dist"; //if there are valid distances ifstream fileHandle; fileHandle.open(tempDistFile.c_str()); if(fileHandle) { - gobble(fileHandle); - if (!fileHandle.eof()) { //check for blank file + m->gobble(fileHandle); + if (!fileHandle.eof()) { //check for blank file - this could occur if all dists in group are above cutoff map temp; temp[tempDistFile] = tempNameFile; dists.push_back(temp); + }else { + ifstream in; + m->openInputFile(tempNameFile, in); + + while(!in.eof()) { + in >> name >> nameList; m->gobble(in); + wroteExtra = true; + remainingNames << name << '\t' << nameList << endl; + } + in.close(); + remove(tempNameFile.c_str()); } } fileHandle.close(); } + remainingNames.close(); + if (!wroteExtra) { + remove(singleton.c_str()); + singleton = "none"; + } + if (m->control_pressed) { for (int i = 0; i < dists.size(); i++) { remove((dists[i].begin()->first).c_str()); remove((dists[i].begin()->second).c_str()); } dists.clear(); } return 0; @@ -259,7 +272,7 @@ int SplitMatrix::splitDistanceFileByTax(map& seqGroup, int numGroup map::iterator it2; ifstream dFile; - openInputFile(distFile, dFile); + m->openInputFile(distFile, dFile); ofstream outFile; for (int i = 0; i < numGroups; i++) { //remove old temp files, just in case @@ -282,7 +295,7 @@ int SplitMatrix::splitDistanceFileByTax(map& seqGroup, int numGroup if (m->control_pressed) { dFile.close(); for (int i = 0; i < numGroups; i++) { remove((distFile + "." + toString(i) + ".temp").c_str()); } } - dFile >> seqA >> seqB >> dist; gobble(dFile); + dFile >> seqA >> seqB >> dist; m->gobble(dFile); //if both sequences are in the same group then they are within the cutoff it = seqGroup.find(seqA); @@ -291,7 +304,7 @@ int SplitMatrix::splitDistanceFileByTax(map& seqGroup, int numGroup if ((it != seqGroup.end()) && (it2 != seqGroup.end())) { //they are both not singletons if (it->second == it2->second) { //they are from the same group so add the distance if (numOutputs[it->second] > 30) { - openOutputFileAppend((distFile + "." + toString(it->second) + ".temp"), outFile); + m->openOutputFileAppend((distFile + "." + toString(it->second) + ".temp"), outFile); outFile << outputs[it->second] << seqA << '\t' << seqB << '\t' << dist << endl; outFile.close(); outputs[it->second] = ""; @@ -311,7 +324,7 @@ int SplitMatrix::splitDistanceFileByTax(map& seqGroup, int numGroup //write out any remaining buffers if (numOutputs[i] > 0) { - openOutputFileAppend((distFile + "." + toString(i) + ".temp"), outFile); + m->openOutputFileAppend((distFile + "." + toString(i) + ".temp"), outFile); outFile << outputs[i]; outFile.close(); outputs[i] = ""; @@ -321,23 +334,23 @@ int SplitMatrix::splitDistanceFileByTax(map& seqGroup, int numGroup } ifstream bigNameFile; - openInputFile(namefile, bigNameFile); + m->openInputFile(namefile, bigNameFile); singleton = namefile + ".extra.temp"; ofstream remainingNames; - openOutputFile(singleton, remainingNames); + m->openOutputFile(singleton, remainingNames); bool wroteExtra = false; string name, nameList; while(!bigNameFile.eof()){ - bigNameFile >> name >> nameList; gobble(bigNameFile); + bigNameFile >> name >> nameList; m->gobble(bigNameFile); //did this sequence get assigned a group it = seqGroup.find(name); if (it != seqGroup.end()) { - openOutputFileAppend((namefile + "." + toString(it->second) + ".temp"), outFile); + m->openOutputFileAppend((namefile + "." + toString(it->second) + ".temp"), outFile); outFile << name << '\t' << nameList << endl; outFile.close(); }else{ @@ -358,10 +371,10 @@ int SplitMatrix::splitDistanceFileByTax(map& seqGroup, int numGroup dists.push_back(temp); }else{ ifstream in; - openInputFile(tempNameFile, in); + m->openInputFile(tempNameFile, in); while(!in.eof()) { - in >> name >> nameList; gobble(in); + in >> name >> nameList; m->gobble(in); wroteExtra = true; remainingNames << name << '\t' << nameList << endl; } @@ -407,7 +420,7 @@ int SplitMatrix::splitDistanceLarge(){ ofstream outFile; ifstream dFile; - openInputFile(distFile, dFile); + m->openInputFile(distFile, dFile); while(dFile){ string seqA, seqB; @@ -621,7 +634,7 @@ int SplitMatrix::splitDistanceLarge(){ } } } - gobble(dFile); + m->gobble(dFile); } dFile.close(); @@ -659,7 +672,7 @@ int SplitMatrix::splitNames(vector >& groups){ while(bigNameFile){ bigNameFile >> name >> nameList; nameMap[name] = nameList; - gobble(bigNameFile); + m->gobble(bigNameFile); } bigNameFile.close(); @@ -728,7 +741,7 @@ int SplitMatrix::splitDistanceRAM(){ int numGroups = 0; ifstream dFile; - openInputFile(distFile, dFile); + m->openInputFile(distFile, dFile); while(dFile){ string seqA, seqB; @@ -811,7 +824,7 @@ int SplitMatrix::splitDistanceRAM(){ } } } - gobble(dFile); + m->gobble(dFile); } dFile.close();