]> git.donarmstrong.com Git - mothur.git/blobdiff - subsample.cpp
fixed segfault in unifrac with subsample. in progress of implementing a version of...
[mothur.git] / subsample.cpp
index b1e78a44a0a2e5b5cd31e7c38e27710603ee1578..70c16a8a6668649e3a8ea697d6f32ec1a738dcc9 100644 (file)
@@ -7,7 +7,47 @@
 //
 
 #include "subsample.h"
-
+//**********************************************************************************************************************
+Tree* SubSample::getSample(Tree* T, TreeMap* tmap, map<string, string> whole, int size, map<string, string> originalNameMap) {
+    try {
+        Tree* newTree = NULL;
+        
+        vector<string> subsampledSeqs = getSample(tmap, size);
+        map<string, string> sampledNameMap = deconvolute(whole, subsampledSeqs); 
+        
+        //remove seqs not in sample from treemap
+        for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
+            //is that name in the subsample?
+            int count = 0;
+            string name = tmap->namesOfSeqs[i];
+            for (int j = 0; j < subsampledSeqs.size(); j++) {
+                if (name == subsampledSeqs[j]) { break; } //found it
+                count++;
+            }
+            
+            if (m->control_pressed) { return newTree; }
+            
+            //if you didnt find it, remove it 
+            if (count == subsampledSeqs.size()) { 
+                tmap->removeSeq(name);
+                tmap->addSeq(name, "doNotIncludeMe");
+            }
+        }
+        
+        //create new tree
+        int numUniques = sampledNameMap.size();
+        if (sampledNameMap.size() == 0) { numUniques = subsampledSeqs.size(); }
+        
+        newTree = new Tree(tmap);
+        newTree->getCopy(T, originalNameMap, subsampledSeqs);
+        
+        return newTree;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "SubSample", "getSample-Tree");
+        exit(1);
+    }
+}
 //**********************************************************************************************************************
 Tree* SubSample::getSample(Tree* T, TreeMap* tmap, map<string, string> whole, int size) {
     try {
@@ -47,7 +87,7 @@ Tree* SubSample::getSample(Tree* T, TreeMap* tmap, map<string, string> whole, in
         m->errorOut(e, "SubSample", "getSample-Tree");
         exit(1);
     }
-}      
+}
 //**********************************************************************************************************************
 //assumes whole maps dupName -> uniqueName
 map<string, string> SubSample::deconvolute(map<string, string> whole, vector<string>& wanted) {
@@ -93,6 +133,37 @@ vector<string> SubSample::getSample(TreeMap* tMap, int size) {
         vector<string> sample;
         
         vector<string> Groups = tMap->getNamesOfGroups();    
+        for (int i = 0; i < Groups.size(); i++) {
+            
+            if (m->inUsersGroups(Groups[i], m->getGroups())) {
+                if (m->control_pressed) { break; }
+                
+                vector<string> thisGroup; thisGroup.push_back(Groups[i]);
+                vector<string> thisGroupsSeqs = tMap->getNamesSeqs(thisGroup);
+                int thisSize = thisGroupsSeqs.size();
+                
+                if (thisSize >= size) {        
+                    
+                    random_shuffle(thisGroupsSeqs.begin(), thisGroupsSeqs.end());
+                    
+                    for (int j = 0; j < size; j++) { sample.push_back(thisGroupsSeqs[j]); }
+                }else {  m->mothurOut("[ERROR]: You have selected a size that is larger than "+Groups[i]+" number of sequences.\n"); m->control_pressed = true; }
+            }
+        } 
+        
+        return sample;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SubSample", "getSample-TreeMap");
+               exit(1);
+       }
+}      
+//**********************************************************************************************************************
+vector<string> SubSample::getSample(TreeMap* tMap, vector<string> Groups) {
+    try {
+        vector<string> sample;
+        
+        //vector<string> Groups = tMap->getNamesOfGroups();    
         for (int i = 0; i < Groups.size(); i++) {
             
             if (m->control_pressed) { break; }
@@ -100,13 +171,8 @@ vector<string> SubSample::getSample(TreeMap* tMap, int size) {
             vector<string> thisGroup; thisGroup.push_back(Groups[i]);
             vector<string> thisGroupsSeqs = tMap->getNamesSeqs(thisGroup);
             int thisSize = thisGroupsSeqs.size();
-            
-            if (thisSize >= size) {    
-                
-                random_shuffle(thisGroupsSeqs.begin(), thisGroupsSeqs.end());
                 
-                for (int j = 0; j < size; j++) { sample.push_back(thisGroupsSeqs[j]); }
-            }else {  m->mothurOut("[ERROR]: You have selected a size that is larger than "+Groups[i]+" number of sequences.\n"); m->control_pressed = true; }
+            for (int j = 0; j < thisSize; j++) { sample.push_back(thisGroupsSeqs[j]); }
         } 
         
         return sample;