]> git.donarmstrong.com Git - mothur.git/blobdiff - subsample.cpp
major change to the tree class to use the count table class instead of tree map....
[mothur.git] / subsample.cpp
index e6dd845adf9b64bec6c932f4cf72de6d8b9fe909..c55accd618991279bb3a1a0ff6d29e0d0250a81e 100644 (file)
@@ -7,7 +7,102 @@
 //
 
 #include "subsample.h"
+//**********************************************************************************************************************
+Tree* SubSample::getSample(Tree* T, CountTable* ct, CountTable* newCt, int size) {
+    try {
+        Tree* newTree = NULL;
+        
+        //remove seqs not in sample from counttable
+        vector<string> Groups = ct->getNamesOfGroups();
+        newCt->copy(ct); 
+        newCt->addGroup("doNotIncludeMe");
+        
+        map<string, int> doNotIncludeTotals; 
+        vector<string> namesSeqs = ct->getNamesOfSeqs();
+        for (int i = 0; i < namesSeqs.size(); i++) {  doNotIncludeTotals[namesSeqs[i]] = 0; }
+    
+        for (int i = 0; i < Groups.size(); i++) {
+            if (m->inUsersGroups(Groups[i], m->getGroups())) {
+                if (m->control_pressed) { break; }
+        
+                int thisSize = ct->getGroupCount(Groups[i]);
+                
+                if (thisSize >= size) {        
+                    
+                    vector<string> names = ct->getNamesOfSeqs(Groups[i]);
+                    vector<int> random;
+                    for (int j = 0; j < names.size(); j++) {
+                        int num = ct->getGroupCount(names[j], Groups[i]);
+                        for (int k = 0; k < num; k++) { random.push_back(j); }
+                    }
+                    random_shuffle(random.begin(), random.end());
+                    
+                    vector<int> sampleRandoms; sampleRandoms.resize(names.size(), 0);
+                    for (int j = 0; j < size; j++) { sampleRandoms[random[j]]++; }
+                    for (int j = 0; j < sampleRandoms.size(); j++) {
+                        newCt->setAbund(names[j], Groups[i], sampleRandoms[j]);
+                    }
+                    sampleRandoms.clear(); sampleRandoms.resize(names.size(), 0);
+                    for (int j = size; j < thisSize; j++) { sampleRandoms[random[j]]++; }
+                    for (int j = 0; j < sampleRandoms.size(); j++) {  doNotIncludeTotals[names[j]] += sampleRandoms[j]; }
+                }else {  m->mothurOut("[ERROR]: You have selected a size that is larger than "+Groups[i]+" number of sequences.\n"); m->control_pressed = true; }
+            }
 
+        }
+        
+        for (map<string, int>::iterator it = doNotIncludeTotals.begin(); it != doNotIncludeTotals.end(); it++) {  
+            newCt->setAbund(it->first, "doNotIncludeMe", it->second);
+        } 
+        
+        newTree = new Tree(newCt);
+        newTree->getCopy(T, true);
+        
+        return newTree;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "SubSample", "getSample-Tree");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
+//assumes whole maps dupName -> uniqueName
+map<string, string> SubSample::deconvolute(map<string, string> whole, vector<string>& wanted) {
+    try {
+        map<string, string> nameMap;
+        
+        //whole will be empty if user gave no name file, so we don't need to make a new one
+        if (whole.size() == 0) { return nameMap; }
+        
+        vector<string> newWanted;
+        for (int i = 0; i < wanted.size(); i++) {
+            
+            if (m->control_pressed) { break; }
+            
+            string dupName = wanted[i];
+            
+            map<string, string>::iterator itWhole = whole.find(dupName);
+            if (itWhole != whole.end()) {
+                string repName = itWhole->second;
+                
+                //do we already have this rep?
+                map<string, string>::iterator itName = nameMap.find(repName);
+                if (itName != nameMap.end()) { //add this seqs to dups list
+                    (itName->second) += "," + dupName;
+                }else { //first sighting of this seq
+                    nameMap[repName] = dupName;
+                    newWanted.push_back(repName);
+                }
+            }else { m->mothurOut("[ERROR]: "+dupName+" is not in your name file, please correct.\n"); m->control_pressed = true; }
+        }
+        
+        wanted = newWanted;
+        return nameMap;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SubSample", "deconvolute");
+               exit(1);
+       }
+}
 //**********************************************************************************************************************
 vector<string> SubSample::getSample(vector<SharedRAbundVector*>& thislookup, int size) {
        try {
@@ -53,7 +148,7 @@ vector<string> SubSample::getSample(vector<SharedRAbundVector*>& thislookup, int
                
                //subsampling may have created some otus with no sequences in them
                eliminateZeroOTUS(thislookup);
-               
+        
                if (m->control_pressed) { return m->currentBinLabels; }
                
                //save mothurOut's binLabels to restore for next label
@@ -64,7 +159,7 @@ vector<string> SubSample::getSample(vector<SharedRAbundVector*>& thislookup, int
                
        }
        catch(exception& e) {
-               m->errorOut(e, "SubSample", "getSample");
+               m->errorOut(e, "SubSample", "getSample-shared");
                exit(1);
        }
 }      
@@ -125,8 +220,51 @@ int SubSample::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
                exit(1);
        }
 }
+//**********************************************************************************************************************
+int SubSample::getSample(SAbundVector*& sabund, int size) {
+       try {
+               
+        OrderVector* order = new OrderVector();
+        *order = sabund->getOrderVector(NULL);
+        
+               int numBins = order->getNumBins();
+               int thisSize = order->getNumSeqs();
+        
+               if (thisSize > size) {
+                       random_shuffle(order->begin(), order->end());
+                       
+            RAbundVector* rabund = new RAbundVector(numBins);
+                       rabund->setLabel(order->getLabel());
 
-
+                       for (int j = 0; j < size; j++) {
+                
+                               if (m->control_pressed) { delete order; delete rabund; return 0; }
+                               
+                               int bin = order->get(j);
+                               
+                               int abund = rabund->get(bin);
+                               rabund->set(bin, (abund+1));
+                       }
+                       
+            delete sabund;
+            sabund = new SAbundVector();
+            *sabund = rabund->getSAbundVector();
+            delete rabund;
+            
+               }else if (thisSize < size) { m->mothurOut("[ERROR]: The size you requested is larger than the number of sequences in the sabund vector. You requested " + toString(size) + " and you only have " + toString(thisSize) + " seqs in your sabund vector.\n"); m->control_pressed = true; }
+               
+               if (m->control_pressed) { return 0; }
+        
+               delete order;
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SubSampleCommand", "getSample");
+               exit(1);
+       }
+}                      
 //**********************************************************************************************************************