X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=subsamplecommand.cpp;h=aebba6bbc430a6608d5fe8bb0a31ef9ebd58c4f4;hb=ee8403d4eb5760187d62b42a9cf4272de8fc0ec4;hp=d4e2c752096318748cafa69615a24627f8270aa8;hpb=57b3c96832667c1b70d4d526331f52e3d49e8237;p=mothur.git diff --git a/subsamplecommand.cpp b/subsamplecommand.cpp index d4e2c75..aebba6b 100644 --- a/subsamplecommand.cpp +++ b/subsamplecommand.cpp @@ -10,6 +10,7 @@ #include "subsamplecommand.h" #include "sharedutilities.h" #include "deconvolutecommand.h" +#include "subsample.h" //********************************************************************************************************************** vector SubSampleCommand::setParameters(){ @@ -638,34 +639,13 @@ int SubSampleCommand::getNames() { int SubSampleCommand::readNames() { try { - ifstream in; - m->openInputFile(namefile, in); - - string thisname, repnames; - map >::iterator it; - - while(!in.eof()){ - - if (m->control_pressed) { in.close(); return 0; } - - in >> thisname; m->gobble(in); //read from first column - in >> repnames; //read from second column - - it = nameMap.find(thisname); - if (it == nameMap.end()) { - - vector splitRepNames; - m->splitAtComma(repnames, splitRepNames); - - nameMap[thisname] = splitRepNames; - for (int i = 0; i < splitRepNames.size(); i++) { names.push_back(splitRepNames[i]); } - - }else{ m->mothurOut(thisname + " is already in namesfile. I will use first definition."); m->mothurOutEndLine(); } - - m->gobble(in); - } - in.close(); - + nameMap.clear(); + m->readNames(namefile, nameMap); + + //save names of all sequences + map >::iterator it; + for (it = nameMap.begin(); it != nameMap.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { names.push_back((it->second)[i]); } } + return 0; } @@ -801,68 +781,28 @@ int SubSampleCommand::processShared(vector& thislookup) { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(sharedfile); } string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + thislookup[0]->getLabel() + ".subsample" + m->getExtension(sharedfile); - - - ofstream out; + + SubSample sample; + vector subsampledLabels = sample.getSample(thislookup, size); + + if (m->control_pressed) { return 0; } + + ofstream out; m->openOutputFile(outputFileName, out); outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName); - int numBins = thislookup[0]->getNumBins(); - for (int i = 0; i < thislookup.size(); i++) { - int thisSize = thislookup[i]->getNumSeqs(); - - if (thisSize != size) { - - string thisgroup = thislookup[i]->getGroup(); - - OrderVector* order = new OrderVector(); - for(int p=0;pgetAbundance(p);j++){ - order->push_back(p); - } - } - random_shuffle(order->begin(), order->end()); - - SharedRAbundVector* temp = new SharedRAbundVector(numBins); - temp->setLabel(thislookup[i]->getLabel()); - temp->setGroup(thislookup[i]->getGroup()); - - delete thislookup[i]; - thislookup[i] = temp; - - - for (int j = 0; j < size; j++) { - - if (m->control_pressed) { delete order; out.close(); return 0; } - - //get random number to sample from order between 0 and thisSize-1. - //don't need this because of the random shuffle above - //int myrand = int((float)(thisSize) * (float)(rand()) / ((float)RAND_MAX+1.0)); - - int bin = order->get(j); - - int abund = thislookup[i]->getAbundance(bin); - thislookup[i]->set(bin, (abund+1), thisgroup); - } - delete order; - } - } - - //subsampling may have created some otus with no sequences in them - eliminateZeroOTUS(thislookup); - - if (m->control_pressed) { out.close(); return 0; } - + m->currentBinLabels = subsampledLabels; + thislookup[0]->printHeaders(out); for (int i = 0; i < thislookup.size(); i++) { out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t'; thislookup[i]->print(out); } - out.close(); - - //save mothurOut's binLabels to restore for next label + + + //save mothurOut's binLabels to restore for next label m->currentBinLabels = saveBinLabels; return 0; @@ -1523,64 +1463,6 @@ int SubSampleCommand::processSabund(SAbundVector*& sabund, ofstream& out) { } } //********************************************************************************************************************** -int SubSampleCommand::eliminateZeroOTUS(vector& thislookup) { - try { - - vector newLookup; - for (int i = 0; i < thislookup.size(); i++) { - SharedRAbundVector* temp = new SharedRAbundVector(); - temp->setLabel(thislookup[i]->getLabel()); - temp->setGroup(thislookup[i]->getGroup()); - newLookup.push_back(temp); - } - - //for each bin - vector newBinLabels; - string snumBins = toString(thislookup[0]->getNumBins()); - for (int i = 0; i < thislookup[0]->getNumBins(); i++) { - if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } - - //look at each sharedRabund and make sure they are not all zero - bool allZero = true; - for (int j = 0; j < thislookup.size(); j++) { - if (thislookup[j]->getAbundance(i) != 0) { allZero = false; break; } - } - - //if they are not all zero add this bin - if (!allZero) { - for (int j = 0; j < thislookup.size(); j++) { - newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup()); - } - //if there is a bin label use it otherwise make one - string binLabel = "Otu"; - string sbinNumber = toString(i+1); - if (sbinNumber.length() < snumBins.length()) { - int diff = snumBins.length() - sbinNumber.length(); - for (int h = 0; h < diff; h++) { binLabel += "0"; } - } - binLabel += sbinNumber; - if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; } - - newBinLabels.push_back(binLabel); - } - } - - for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; } - thislookup.clear(); - - thislookup = newLookup; - m->currentBinLabels = newBinLabels; - - return 0; - - } - catch(exception& e) { - m->errorOut(e, "SubSampleCommand", "eliminateZeroOTUS"); - exit(1); - } -} - -//**********************************************************************************************************************