//
#include "subsample.h"
-
+//**********************************************************************************************************************
+Tree* SubSample::getSample(Tree* T, TreeMap* tmap, map<string, string> whole, int size, map<string, string> originalNameMap) {
+ try {
+ Tree* newTree = NULL;
+
+ vector<string> subsampledSeqs = getSample(tmap, size);
+ map<string, string> sampledNameMap = deconvolute(whole, subsampledSeqs);
+
+ //remove seqs not in sample from treemap
+ for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
+ //is that name in the subsample?
+ int count = 0;
+ string name = tmap->namesOfSeqs[i];
+ for (int j = 0; j < subsampledSeqs.size(); j++) {
+ if (name == subsampledSeqs[j]) { break; } //found it
+ count++;
+ }
+
+ if (m->control_pressed) { return newTree; }
+
+ //if you didnt find it, remove it
+ if (count == subsampledSeqs.size()) {
+ tmap->removeSeq(name);
+ tmap->addSeq(name, "doNotIncludeMe");
+ }
+ }
+
+ //create new tree
+ int numUniques = sampledNameMap.size();
+ if (sampledNameMap.size() == 0) { numUniques = subsampledSeqs.size(); }
+
+ newTree = new Tree(tmap);
+ newTree->getCopy(T, originalNameMap, subsampledSeqs);
+
+ return newTree;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SubSample", "getSample-Tree");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
Tree* SubSample::getSample(Tree* T, TreeMap* tmap, map<string, string> whole, int size) {
try {
m->errorOut(e, "SubSample", "getSample-Tree");
exit(1);
}
-}
+}
//**********************************************************************************************************************
//assumes whole maps dupName -> uniqueName
map<string, string> SubSample::deconvolute(map<string, string> whole, vector<string>& wanted) {
vector<string> sample;
vector<string> Groups = tMap->getNamesOfGroups();
+ for (int i = 0; i < Groups.size(); i++) {
+
+ if (m->inUsersGroups(Groups[i], m->getGroups())) {
+ if (m->control_pressed) { break; }
+
+ vector<string> thisGroup; thisGroup.push_back(Groups[i]);
+ vector<string> thisGroupsSeqs = tMap->getNamesSeqs(thisGroup);
+ int thisSize = thisGroupsSeqs.size();
+
+ if (thisSize >= size) {
+
+ random_shuffle(thisGroupsSeqs.begin(), thisGroupsSeqs.end());
+
+ for (int j = 0; j < size; j++) { sample.push_back(thisGroupsSeqs[j]); }
+ }else { m->mothurOut("[ERROR]: You have selected a size that is larger than "+Groups[i]+" number of sequences.\n"); m->control_pressed = true; }
+ }
+ }
+
+ return sample;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SubSample", "getSample-TreeMap");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+vector<string> SubSample::getSample(TreeMap* tMap, vector<string> Groups) {
+ try {
+ vector<string> sample;
+
+ //vector<string> Groups = tMap->getNamesOfGroups();
for (int i = 0; i < Groups.size(); i++) {
if (m->control_pressed) { break; }
vector<string> thisGroup; thisGroup.push_back(Groups[i]);
vector<string> thisGroupsSeqs = tMap->getNamesSeqs(thisGroup);
int thisSize = thisGroupsSeqs.size();
-
- if (thisSize >= size) {
-
- random_shuffle(thisGroupsSeqs.begin(), thisGroupsSeqs.end());
- for (int j = 0; j < size; j++) { sample.push_back(thisGroupsSeqs[j]); }
- }else { m->mothurOut("[ERROR]: You have selected a size that is larger than "+Groups[i]+" number of sequences.\n"); m->control_pressed = true; }
+ for (int j = 0; j < thisSize; j++) { sample.push_back(thisGroupsSeqs[j]); }
}
return sample;