X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=subsample.cpp;fp=subsample.cpp;h=c55accd618991279bb3a1a0ff6d29e0d0250a81e;hb=6c2b1e530a5c0bb87040e58a3e410097acdfcc3d;hp=261297df67cfc1c5a8933a72f8bba307defbef88;hpb=f509429e06e545bde69c97cacc0eb436775bd329;p=mothur.git diff --git a/subsample.cpp b/subsample.cpp index 261297d..c55accd 100644 --- a/subsample.cpp +++ b/subsample.cpp @@ -8,62 +8,54 @@ #include "subsample.h" //********************************************************************************************************************** -Tree* SubSample::getSample(Tree* T, TreeMap* tmap, TreeMap* newTmap, int size, map originalNameMap) { +Tree* SubSample::getSample(Tree* T, CountTable* ct, CountTable* newCt, int size) { try { Tree* newTree = NULL; - map > newGroups; - vector subsampledSeqs = getSample(tmap, size, newGroups); + //remove seqs not in sample from counttable + vector Groups = ct->getNamesOfGroups(); + newCt->copy(ct); + newCt->addGroup("doNotIncludeMe"); - //remove seqs not in sample from treemap - for (map >::iterator it = newGroups.begin(); it != newGroups.end(); it++) { - for (int i = 0; i < (it->second).size(); i++) { - newTmap->addSeq((it->second)[i], it->first); - } - } - - newTree = new Tree(newTmap); - newTree->getCopy(T, originalNameMap); - - return newTree; - } - catch(exception& e) { - m->errorOut(e, "SubSample", "getSample-Tree"); - exit(1); - } -} -/********************************************************************************************************************** -Tree* SubSample::getSample(Tree* T, TreeMap* tmap, map whole, int size) { - try { - Tree* newTree = NULL; - - vector subsampledSeqs = getSample(tmap, size); - map sampledNameMap = deconvolute(whole, subsampledSeqs); + map doNotIncludeTotals; + vector namesSeqs = ct->getNamesOfSeqs(); + for (int i = 0; i < namesSeqs.size(); i++) { doNotIncludeTotals[namesSeqs[i]] = 0; } + + for (int i = 0; i < Groups.size(); i++) { + if (m->inUsersGroups(Groups[i], m->getGroups())) { + if (m->control_pressed) { break; } - //remove seqs not in sample from treemap - for (int i = 0; i < tmap->namesOfSeqs.size(); i++) { - //is that name in the subsample? - int count = 0; - for (int j = 0; j < subsampledSeqs.size(); j++) { - if (tmap->namesOfSeqs[i] == subsampledSeqs[j]) { break; } //found it - count++; + int thisSize = ct->getGroupCount(Groups[i]); + + if (thisSize >= size) { + + vector names = ct->getNamesOfSeqs(Groups[i]); + vector random; + for (int j = 0; j < names.size(); j++) { + int num = ct->getGroupCount(names[j], Groups[i]); + for (int k = 0; k < num; k++) { random.push_back(j); } + } + random_shuffle(random.begin(), random.end()); + + vector sampleRandoms; sampleRandoms.resize(names.size(), 0); + for (int j = 0; j < size; j++) { sampleRandoms[random[j]]++; } + for (int j = 0; j < sampleRandoms.size(); j++) { + newCt->setAbund(names[j], Groups[i], sampleRandoms[j]); + } + sampleRandoms.clear(); sampleRandoms.resize(names.size(), 0); + for (int j = size; j < thisSize; j++) { sampleRandoms[random[j]]++; } + for (int j = 0; j < sampleRandoms.size(); j++) { doNotIncludeTotals[names[j]] += sampleRandoms[j]; } + }else { m->mothurOut("[ERROR]: You have selected a size that is larger than "+Groups[i]+" number of sequences.\n"); m->control_pressed = true; } } - if (m->control_pressed) { return newTree; } - - //if you didnt find it, remove it - if (count == subsampledSeqs.size()) { - tmap->removeSeq(tmap->namesOfSeqs[i]); - i--; //need this because removeSeq removes name from namesOfSeqs - } } - //create new tree - int numUniques = sampledNameMap.size(); - if (sampledNameMap.size() == 0) { numUniques = subsampledSeqs.size(); } + for (map::iterator it = doNotIncludeTotals.begin(); it != doNotIncludeTotals.end(); it++) { + newCt->setAbund(it->first, "doNotIncludeMe", it->second); + } - newTree = new Tree(numUniques, tmap); //numNodes, treemap - newTree->getSubTree(T, subsampledSeqs, sampledNameMap); + newTree = new Tree(newCt); + newTree->getCopy(T, true); return newTree; } @@ -71,7 +63,7 @@ Tree* SubSample::getSample(Tree* T, TreeMap* tmap, map whole, in m->errorOut(e, "SubSample", "getSample-Tree"); exit(1); } -}*/ +} //********************************************************************************************************************** //assumes whole maps dupName -> uniqueName map SubSample::deconvolute(map whole, vector& wanted) { @@ -112,100 +104,6 @@ map SubSample::deconvolute(map whole, vector SubSample::getSample(TreeMap* tMap, int size, map >& sample) { - try { - vector temp2; - sample["doNotIncludeMe"] = temp2; - - vector namesInSample; - - vector Groups = tMap->getNamesOfGroups(); - for (int i = 0; i < Groups.size(); i++) { - - if (m->inUsersGroups(Groups[i], m->getGroups())) { - if (m->control_pressed) { break; } - - vector thisGroup; thisGroup.push_back(Groups[i]); - vector thisGroupsSeqs = tMap->getNamesSeqs(thisGroup); - int thisSize = thisGroupsSeqs.size(); - vector temp; - sample[Groups[i]] = temp; - - if (thisSize >= size) { - - random_shuffle(thisGroupsSeqs.begin(), thisGroupsSeqs.end()); - - for (int j = 0; j < size; j++) { sample[Groups[i]].push_back(thisGroupsSeqs[j]); namesInSample.push_back(thisGroupsSeqs[j]); } - for (int j = size; j < thisSize; j++) { sample["doNotIncludeMe"].push_back(thisGroupsSeqs[j]); } - - }else { m->mothurOut("[ERROR]: You have selected a size that is larger than "+Groups[i]+" number of sequences.\n"); m->control_pressed = true; } - } - } - - return namesInSample; - } - catch(exception& e) { - m->errorOut(e, "SubSample", "getSample-TreeMap"); - exit(1); - } -} - -//********************************************************************************************************************** -vector SubSample::getSample(TreeMap* tMap, int size) { - try { - vector sample; - - vector Groups = tMap->getNamesOfGroups(); - for (int i = 0; i < Groups.size(); i++) { - - if (m->inUsersGroups(Groups[i], m->getGroups())) { - if (m->control_pressed) { break; } - - vector thisGroup; thisGroup.push_back(Groups[i]); - vector thisGroupsSeqs = tMap->getNamesSeqs(thisGroup); - int thisSize = thisGroupsSeqs.size(); - - if (thisSize >= size) { - - random_shuffle(thisGroupsSeqs.begin(), thisGroupsSeqs.end()); - - for (int j = 0; j < size; j++) { sample.push_back(thisGroupsSeqs[j]); } - }else { m->mothurOut("[ERROR]: You have selected a size that is larger than "+Groups[i]+" number of sequences.\n"); m->control_pressed = true; } - } - } - - return sample; - } - catch(exception& e) { - m->errorOut(e, "SubSample", "getSample-TreeMap"); - exit(1); - } -} -//********************************************************************************************************************** -vector SubSample::getSample(TreeMap* tMap, vector Groups) { - try { - vector sample; - - //vector Groups = tMap->getNamesOfGroups(); - for (int i = 0; i < Groups.size(); i++) { - - if (m->control_pressed) { break; } - - vector thisGroup; thisGroup.push_back(Groups[i]); - vector thisGroupsSeqs = tMap->getNamesSeqs(thisGroup); - int thisSize = thisGroupsSeqs.size(); - - for (int j = 0; j < thisSize; j++) { sample.push_back(thisGroupsSeqs[j]); } - } - - return sample; - } - catch(exception& e) { - m->errorOut(e, "SubSample", "getSample-TreeMap"); - exit(1); - } -} -//********************************************************************************************************************** vector SubSample::getSample(vector& thislookup, int size) { try {