X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=subsample.cpp;fp=subsample.cpp;h=457b7b9d5f6a1273b57e898b3adec90dff5dd9f0;hb=72e0be6b9c80009d4dbee24e8d690ad9514dc6fb;hp=70c16a8a6668649e3a8ea697d6f32ec1a738dcc9;hpb=94d43af803e4be78b2ba8049669504e854ad13be;p=mothur.git diff --git a/subsample.cpp b/subsample.cpp index 70c16a8..457b7b9 100644 --- a/subsample.cpp +++ b/subsample.cpp @@ -8,38 +8,22 @@ #include "subsample.h" //********************************************************************************************************************** -Tree* SubSample::getSample(Tree* T, TreeMap* tmap, map whole, int size, map originalNameMap) { +Tree* SubSample::getSample(Tree* T, TreeMap* tmap, TreeMap* newTmap, int size, map originalNameMap) { try { Tree* newTree = NULL; - vector subsampledSeqs = getSample(tmap, size); - map sampledNameMap = deconvolute(whole, subsampledSeqs); + map > newGroups; + vector subsampledSeqs = getSample(tmap, size, newGroups); //remove seqs not in sample from treemap - for (int i = 0; i < tmap->namesOfSeqs.size(); i++) { - //is that name in the subsample? - int count = 0; - string name = tmap->namesOfSeqs[i]; - for (int j = 0; j < subsampledSeqs.size(); j++) { - if (name == subsampledSeqs[j]) { break; } //found it - count++; - } - - if (m->control_pressed) { return newTree; } - - //if you didnt find it, remove it - if (count == subsampledSeqs.size()) { - tmap->removeSeq(name); - tmap->addSeq(name, "doNotIncludeMe"); + for (map >::iterator it = newGroups.begin(); it != newGroups.end(); it++) { + for (int i = 0; i < (it->second).size(); i++) { + newTmap->addSeq((it->second)[i], it->first); } } - //create new tree - int numUniques = sampledNameMap.size(); - if (sampledNameMap.size() == 0) { numUniques = subsampledSeqs.size(); } - - newTree = new Tree(tmap); - newTree->getCopy(T, originalNameMap, subsampledSeqs); + newTree = new Tree(newTmap); + newTree->getCopy(T, originalNameMap); return newTree; } @@ -48,7 +32,7 @@ Tree* SubSample::getSample(Tree* T, TreeMap* tmap, map whole, in exit(1); } } -//********************************************************************************************************************** +/********************************************************************************************************************** Tree* SubSample::getSample(Tree* T, TreeMap* tmap, map whole, int size) { try { Tree* newTree = NULL; @@ -87,7 +71,7 @@ Tree* SubSample::getSample(Tree* T, TreeMap* tmap, map whole, in m->errorOut(e, "SubSample", "getSample-Tree"); exit(1); } -} +}*/ //********************************************************************************************************************** //assumes whole maps dupName -> uniqueName map SubSample::deconvolute(map whole, vector& wanted) { @@ -126,7 +110,46 @@ map SubSample::deconvolute(map whole, vectorerrorOut(e, "SubSample", "deconvolute"); exit(1); } +} +//********************************************************************************************************************** +vector SubSample::getSample(TreeMap* tMap, int size, map >& sample) { + try { + vector temp2; + sample["doNotIncludeMe"] = temp2; + + vector namesInSample; + + vector Groups = tMap->getNamesOfGroups(); + for (int i = 0; i < Groups.size(); i++) { + + if (m->inUsersGroups(Groups[i], m->getGroups())) { + if (m->control_pressed) { break; } + + vector thisGroup; thisGroup.push_back(Groups[i]); + vector thisGroupsSeqs = tMap->getNamesSeqs(thisGroup); + int thisSize = thisGroupsSeqs.size(); + vector temp; + sample[Groups[i]] = temp; + + if (thisSize >= size) { + + random_shuffle(thisGroupsSeqs.begin(), thisGroupsSeqs.end()); + + for (int j = 0; j < size; j++) { sample[Groups[i]].push_back(thisGroupsSeqs[j]); namesInSample.push_back(thisGroupsSeqs[j]); } + for (int j = size; j < thisSize; j++) { sample["doNotIncludeMe"].push_back(thisGroupsSeqs[j]); } + + }else { m->mothurOut("[ERROR]: You have selected a size that is larger than "+Groups[i]+" number of sequences.\n"); m->control_pressed = true; } + } + } + + return namesInSample; + } + catch(exception& e) { + m->errorOut(e, "SubSample", "getSample-TreeMap"); + exit(1); + } } + //********************************************************************************************************************** vector SubSample::getSample(TreeMap* tMap, int size) { try {