X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=subsample.cpp;h=261297df67cfc1c5a8933a72f8bba307defbef88;hb=2c97dd48b8e27ee0a6a86c7a082f4c504c3357c6;hp=b1e78a44a0a2e5b5cd31e7c38e27710603ee1578;hpb=0caf3fbabaa3ece404f8ce77f4c883dc5b1bf1dc;p=mothur.git diff --git a/subsample.cpp b/subsample.cpp index b1e78a4..261297d 100644 --- a/subsample.cpp +++ b/subsample.cpp @@ -7,8 +7,32 @@ // #include "subsample.h" - //********************************************************************************************************************** +Tree* SubSample::getSample(Tree* T, TreeMap* tmap, TreeMap* newTmap, int size, map originalNameMap) { + try { + Tree* newTree = NULL; + + map > newGroups; + vector subsampledSeqs = getSample(tmap, size, newGroups); + + //remove seqs not in sample from treemap + for (map >::iterator it = newGroups.begin(); it != newGroups.end(); it++) { + for (int i = 0; i < (it->second).size(); i++) { + newTmap->addSeq((it->second)[i], it->first); + } + } + + newTree = new Tree(newTmap); + newTree->getCopy(T, originalNameMap); + + return newTree; + } + catch(exception& e) { + m->errorOut(e, "SubSample", "getSample-Tree"); + exit(1); + } +} +/********************************************************************************************************************** Tree* SubSample::getSample(Tree* T, TreeMap* tmap, map whole, int size) { try { Tree* newTree = NULL; @@ -47,7 +71,7 @@ Tree* SubSample::getSample(Tree* T, TreeMap* tmap, map whole, in m->errorOut(e, "SubSample", "getSample-Tree"); exit(1); } -} +}*/ //********************************************************************************************************************** //assumes whole maps dupName -> uniqueName map SubSample::deconvolute(map whole, vector& wanted) { @@ -86,13 +110,83 @@ map SubSample::deconvolute(map whole, vectorerrorOut(e, "SubSample", "deconvolute"); exit(1); } +} +//********************************************************************************************************************** +vector SubSample::getSample(TreeMap* tMap, int size, map >& sample) { + try { + vector temp2; + sample["doNotIncludeMe"] = temp2; + + vector namesInSample; + + vector Groups = tMap->getNamesOfGroups(); + for (int i = 0; i < Groups.size(); i++) { + + if (m->inUsersGroups(Groups[i], m->getGroups())) { + if (m->control_pressed) { break; } + + vector thisGroup; thisGroup.push_back(Groups[i]); + vector thisGroupsSeqs = tMap->getNamesSeqs(thisGroup); + int thisSize = thisGroupsSeqs.size(); + vector temp; + sample[Groups[i]] = temp; + + if (thisSize >= size) { + + random_shuffle(thisGroupsSeqs.begin(), thisGroupsSeqs.end()); + + for (int j = 0; j < size; j++) { sample[Groups[i]].push_back(thisGroupsSeqs[j]); namesInSample.push_back(thisGroupsSeqs[j]); } + for (int j = size; j < thisSize; j++) { sample["doNotIncludeMe"].push_back(thisGroupsSeqs[j]); } + + }else { m->mothurOut("[ERROR]: You have selected a size that is larger than "+Groups[i]+" number of sequences.\n"); m->control_pressed = true; } + } + } + + return namesInSample; + } + catch(exception& e) { + m->errorOut(e, "SubSample", "getSample-TreeMap"); + exit(1); + } } + //********************************************************************************************************************** vector SubSample::getSample(TreeMap* tMap, int size) { try { vector sample; vector Groups = tMap->getNamesOfGroups(); + for (int i = 0; i < Groups.size(); i++) { + + if (m->inUsersGroups(Groups[i], m->getGroups())) { + if (m->control_pressed) { break; } + + vector thisGroup; thisGroup.push_back(Groups[i]); + vector thisGroupsSeqs = tMap->getNamesSeqs(thisGroup); + int thisSize = thisGroupsSeqs.size(); + + if (thisSize >= size) { + + random_shuffle(thisGroupsSeqs.begin(), thisGroupsSeqs.end()); + + for (int j = 0; j < size; j++) { sample.push_back(thisGroupsSeqs[j]); } + }else { m->mothurOut("[ERROR]: You have selected a size that is larger than "+Groups[i]+" number of sequences.\n"); m->control_pressed = true; } + } + } + + return sample; + } + catch(exception& e) { + m->errorOut(e, "SubSample", "getSample-TreeMap"); + exit(1); + } +} +//********************************************************************************************************************** +vector SubSample::getSample(TreeMap* tMap, vector Groups) { + try { + vector sample; + + //vector Groups = tMap->getNamesOfGroups(); for (int i = 0; i < Groups.size(); i++) { if (m->control_pressed) { break; } @@ -100,13 +194,8 @@ vector SubSample::getSample(TreeMap* tMap, int size) { vector thisGroup; thisGroup.push_back(Groups[i]); vector thisGroupsSeqs = tMap->getNamesSeqs(thisGroup); int thisSize = thisGroupsSeqs.size(); - - if (thisSize >= size) { - random_shuffle(thisGroupsSeqs.begin(), thisGroupsSeqs.end()); - - for (int j = 0; j < size; j++) { sample.push_back(thisGroupsSeqs[j]); } - }else { m->mothurOut("[ERROR]: You have selected a size that is larger than "+Groups[i]+" number of sequences.\n"); m->control_pressed = true; } + for (int j = 0; j < thisSize; j++) { sample.push_back(thisGroupsSeqs[j]); } } return sample; @@ -161,7 +250,7 @@ vector SubSample::getSample(vector& thislookup, int //subsampling may have created some otus with no sequences in them eliminateZeroOTUS(thislookup); - + if (m->control_pressed) { return m->currentBinLabels; } //save mothurOut's binLabels to restore for next label @@ -233,8 +322,51 @@ int SubSample::eliminateZeroOTUS(vector& thislookup) { exit(1); } } +//********************************************************************************************************************** +int SubSample::getSample(SAbundVector*& sabund, int size) { + try { + + OrderVector* order = new OrderVector(); + *order = sabund->getOrderVector(NULL); + + int numBins = order->getNumBins(); + int thisSize = order->getNumSeqs(); + + if (thisSize > size) { + random_shuffle(order->begin(), order->end()); + + RAbundVector* rabund = new RAbundVector(numBins); + rabund->setLabel(order->getLabel()); - + for (int j = 0; j < size; j++) { + + if (m->control_pressed) { delete order; delete rabund; return 0; } + + int bin = order->get(j); + + int abund = rabund->get(bin); + rabund->set(bin, (abund+1)); + } + + delete sabund; + sabund = new SAbundVector(); + *sabund = rabund->getSAbundVector(); + delete rabund; + + }else if (thisSize < size) { m->mothurOut("[ERROR]: The size you requested is larger than the number of sequences in the sabund vector. You requested " + toString(size) + " and you only have " + toString(thisSize) + " seqs in your sabund vector.\n"); m->control_pressed = true; } + + if (m->control_pressed) { return 0; } + + delete order; + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "SubSampleCommand", "getSample"); + exit(1); + } +} //**********************************************************************************************************************