]> git.donarmstrong.com Git - mothur.git/blobdiff - subsample.cpp
fixed bug with dist.shared subsampling. added mode parameter to dist.shared so...
[mothur.git] / subsample.cpp
index b1e78a44a0a2e5b5cd31e7c38e27710603ee1578..261297df67cfc1c5a8933a72f8bba307defbef88 100644 (file)
@@ -7,8 +7,32 @@
 //
 
 #include "subsample.h"
-
 //**********************************************************************************************************************
+Tree* SubSample::getSample(Tree* T, TreeMap* tmap, TreeMap* newTmap, int size, map<string, string> originalNameMap) {
+    try {
+        Tree* newTree = NULL;
+        
+        map<string, vector<string> > newGroups;
+        vector<string> subsampledSeqs = getSample(tmap, size, newGroups);
+        
+        //remove seqs not in sample from treemap
+        for (map<string, vector<string> >::iterator it = newGroups.begin(); it != newGroups.end(); it++) {
+            for (int i = 0; i < (it->second).size(); i++) {
+                newTmap->addSeq((it->second)[i], it->first);
+            }
+        }
+        
+        newTree = new Tree(newTmap);
+        newTree->getCopy(T, originalNameMap);
+        
+        return newTree;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "SubSample", "getSample-Tree");
+        exit(1);
+    }
+}
+/**********************************************************************************************************************
 Tree* SubSample::getSample(Tree* T, TreeMap* tmap, map<string, string> whole, int size) {
     try {
         Tree* newTree = NULL;
@@ -47,7 +71,7 @@ Tree* SubSample::getSample(Tree* T, TreeMap* tmap, map<string, string> whole, in
         m->errorOut(e, "SubSample", "getSample-Tree");
         exit(1);
     }
-}      
+}*/
 //**********************************************************************************************************************
 //assumes whole maps dupName -> uniqueName
 map<string, string> SubSample::deconvolute(map<string, string> whole, vector<string>& wanted) {
@@ -86,13 +110,83 @@ map<string, string> SubSample::deconvolute(map<string, string> whole, vector<str
                m->errorOut(e, "SubSample", "deconvolute");
                exit(1);
        }
+}
+//**********************************************************************************************************************
+vector<string> SubSample::getSample(TreeMap* tMap, int size, map<string, vector<string> >& sample) {
+    try {
+        vector<string> temp2;
+        sample["doNotIncludeMe"] = temp2;
+        
+        vector<string> namesInSample;
+        
+        vector<string> Groups = tMap->getNamesOfGroups();    
+        for (int i = 0; i < Groups.size(); i++) {
+            
+            if (m->inUsersGroups(Groups[i], m->getGroups())) {
+                if (m->control_pressed) { break; }
+                
+                vector<string> thisGroup; thisGroup.push_back(Groups[i]);
+                vector<string> thisGroupsSeqs = tMap->getNamesSeqs(thisGroup);
+                int thisSize = thisGroupsSeqs.size();
+                vector<string> temp;
+                sample[Groups[i]] = temp;
+                
+                if (thisSize >= size) {        
+                    
+                    random_shuffle(thisGroupsSeqs.begin(), thisGroupsSeqs.end());
+                    
+                    for (int j = 0; j < size; j++) { sample[Groups[i]].push_back(thisGroupsSeqs[j]); namesInSample.push_back(thisGroupsSeqs[j]); }
+                    for (int j = size; j < thisSize; j++) { sample["doNotIncludeMe"].push_back(thisGroupsSeqs[j]); }
+                    
+                }else {  m->mothurOut("[ERROR]: You have selected a size that is larger than "+Groups[i]+" number of sequences.\n"); m->control_pressed = true; }
+            }
+        } 
+        
+        return namesInSample;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SubSample", "getSample-TreeMap");
+               exit(1);
+       }
 }      
+
 //**********************************************************************************************************************
 vector<string> SubSample::getSample(TreeMap* tMap, int size) {
     try {
         vector<string> sample;
         
         vector<string> Groups = tMap->getNamesOfGroups();    
+        for (int i = 0; i < Groups.size(); i++) {
+            
+            if (m->inUsersGroups(Groups[i], m->getGroups())) {
+                if (m->control_pressed) { break; }
+                
+                vector<string> thisGroup; thisGroup.push_back(Groups[i]);
+                vector<string> thisGroupsSeqs = tMap->getNamesSeqs(thisGroup);
+                int thisSize = thisGroupsSeqs.size();
+                
+                if (thisSize >= size) {        
+                    
+                    random_shuffle(thisGroupsSeqs.begin(), thisGroupsSeqs.end());
+                    
+                    for (int j = 0; j < size; j++) { sample.push_back(thisGroupsSeqs[j]); }
+                }else {  m->mothurOut("[ERROR]: You have selected a size that is larger than "+Groups[i]+" number of sequences.\n"); m->control_pressed = true; }
+            }
+        } 
+        
+        return sample;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SubSample", "getSample-TreeMap");
+               exit(1);
+       }
+}      
+//**********************************************************************************************************************
+vector<string> SubSample::getSample(TreeMap* tMap, vector<string> Groups) {
+    try {
+        vector<string> sample;
+        
+        //vector<string> Groups = tMap->getNamesOfGroups();    
         for (int i = 0; i < Groups.size(); i++) {
             
             if (m->control_pressed) { break; }
@@ -100,13 +194,8 @@ vector<string> SubSample::getSample(TreeMap* tMap, int size) {
             vector<string> thisGroup; thisGroup.push_back(Groups[i]);
             vector<string> thisGroupsSeqs = tMap->getNamesSeqs(thisGroup);
             int thisSize = thisGroupsSeqs.size();
-            
-            if (thisSize >= size) {    
                 
-                random_shuffle(thisGroupsSeqs.begin(), thisGroupsSeqs.end());
-                
-                for (int j = 0; j < size; j++) { sample.push_back(thisGroupsSeqs[j]); }
-            }else {  m->mothurOut("[ERROR]: You have selected a size that is larger than "+Groups[i]+" number of sequences.\n"); m->control_pressed = true; }
+            for (int j = 0; j < thisSize; j++) { sample.push_back(thisGroupsSeqs[j]); }
         } 
         
         return sample;
@@ -161,7 +250,7 @@ vector<string> SubSample::getSample(vector<SharedRAbundVector*>& thislookup, int
                
                //subsampling may have created some otus with no sequences in them
                eliminateZeroOTUS(thislookup);
-               
+        
                if (m->control_pressed) { return m->currentBinLabels; }
                
                //save mothurOut's binLabels to restore for next label
@@ -233,8 +322,51 @@ int SubSample::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
                exit(1);
        }
 }
+//**********************************************************************************************************************
+int SubSample::getSample(SAbundVector*& sabund, int size) {
+       try {
+               
+        OrderVector* order = new OrderVector();
+        *order = sabund->getOrderVector(NULL);
+        
+               int numBins = order->getNumBins();
+               int thisSize = order->getNumSeqs();
+        
+               if (thisSize > size) {
+                       random_shuffle(order->begin(), order->end());
+                       
+            RAbundVector* rabund = new RAbundVector(numBins);
+                       rabund->setLabel(order->getLabel());
 
-
+                       for (int j = 0; j < size; j++) {
+                
+                               if (m->control_pressed) { delete order; delete rabund; return 0; }
+                               
+                               int bin = order->get(j);
+                               
+                               int abund = rabund->get(bin);
+                               rabund->set(bin, (abund+1));
+                       }
+                       
+            delete sabund;
+            sabund = new SAbundVector();
+            *sabund = rabund->getSAbundVector();
+            delete rabund;
+            
+               }else if (thisSize < size) { m->mothurOut("[ERROR]: The size you requested is larger than the number of sequences in the sabund vector. You requested " + toString(size) + " and you only have " + toString(thisSize) + " seqs in your sabund vector.\n"); m->control_pressed = true; }
+               
+               if (m->control_pressed) { return 0; }
+        
+               delete order;
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SubSampleCommand", "getSample");
+               exit(1);
+       }
+}                      
 //**********************************************************************************************************************