]> git.donarmstrong.com Git - mothur.git/blob - subsample.cpp
working on adding subsampling to dist.shared. fixed bug in phylotype command related...
[mothur.git] / subsample.cpp
1 //
2 //  subsample.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 4/2/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "subsample.h"
10
11 //**********************************************************************************************************************
12 vector<SharedRAbundVector*> SubSample::getSamplePreserve(vector<SharedRAbundVector*>& thislookup, vector<string>& newLabels, int size) {
13         try {
14                 
15         vector<SharedRAbundVector*> newlookup; newlookup.resize(thislookup.size(), NULL); 
16         
17                 //save mothurOut's binLabels to restore for next label
18                 vector<string> saveBinLabels = m->currentBinLabels;
19                 
20                 int numBins = thislookup[0]->getNumBins();
21                 for (int i = 0; i < thislookup.size(); i++) {           
22                         int thisSize = thislookup[i]->getNumSeqs();
23                         
24                         if (thisSize != size) {
25                                 
26                                 string thisgroup = thislookup[i]->getGroup();
27                                 
28                                 OrderVector order;
29                                 for(int p=0;p<numBins;p++){
30                                         for(int j=0;j<thislookup[i]->getAbundance(p);j++){
31                                                 order.push_back(p);
32                                         }
33                                 }
34                                 random_shuffle(order.begin(), order.end());
35                                 
36                                 SharedRAbundVector* temp = new SharedRAbundVector(numBins);
37                                 temp->setLabel(thislookup[i]->getLabel());
38                                 temp->setGroup(thislookup[i]->getGroup());
39                                 
40                                 newlookup[i] = temp;
41                                 
42                                 for (int j = 0; j < size; j++) {
43                                         
44                                         if (m->control_pressed) {  return newlookup; }
45                                         
46                                         int bin = order.get(j);
47                                         
48                                         int abund = newlookup[i]->getAbundance(bin);
49                                         newlookup[i]->set(bin, (abund+1), thisgroup);
50                                 }       
51                         }
52                 }
53                 
54                 //subsampling may have created some otus with no sequences in them
55                 eliminateZeroOTUS(newlookup);
56                 
57                 if (m->control_pressed) { return newlookup; }
58                 
59                 //save mothurOut's binLabels to restore for next label
60         newLabels = m->currentBinLabels;
61                 m->currentBinLabels = saveBinLabels;
62                 
63                 return newlookup;
64                 
65         }
66         catch(exception& e) {
67                 m->errorOut(e, "SubSample", "getSamplePreserve");
68                 exit(1);
69         }
70 }       
71 //**********************************************************************************************************************
72 vector<string> SubSample::getSample(vector<SharedRAbundVector*>& thislookup, int size) {
73         try {
74                 
75                 //save mothurOut's binLabels to restore for next label
76                 vector<string> saveBinLabels = m->currentBinLabels;
77                 
78                 int numBins = thislookup[0]->getNumBins();
79                 for (int i = 0; i < thislookup.size(); i++) {           
80                         int thisSize = thislookup[i]->getNumSeqs();
81                         
82                         if (thisSize != size) {
83                                 
84                                 string thisgroup = thislookup[i]->getGroup();
85                                 
86                                 OrderVector order;
87                                 for(int p=0;p<numBins;p++){
88                                         for(int j=0;j<thislookup[i]->getAbundance(p);j++){
89                                                 order.push_back(p);
90                                         }
91                                 }
92                                 random_shuffle(order.begin(), order.end());
93                                 
94                                 SharedRAbundVector* temp = new SharedRAbundVector(numBins);
95                                 temp->setLabel(thislookup[i]->getLabel());
96                                 temp->setGroup(thislookup[i]->getGroup());
97                                 
98                                 delete thislookup[i];
99                                 thislookup[i] = temp;
100                                 
101                                 
102                                 for (int j = 0; j < size; j++) {
103                                         
104                                         if (m->control_pressed) {  return m->currentBinLabels; }
105                                         
106                                         int bin = order.get(j);
107                                         
108                                         int abund = thislookup[i]->getAbundance(bin);
109                                         thislookup[i]->set(bin, (abund+1), thisgroup);
110                                 }       
111                         }
112                 }
113                 
114                 //subsampling may have created some otus with no sequences in them
115                 eliminateZeroOTUS(thislookup);
116                 
117                 if (m->control_pressed) { return m->currentBinLabels; }
118                 
119                 //save mothurOut's binLabels to restore for next label
120         vector<string> subsampleBinLabels = m->currentBinLabels;
121                 m->currentBinLabels = saveBinLabels;
122                 
123                 return subsampleBinLabels;
124                 
125         }
126         catch(exception& e) {
127                 m->errorOut(e, "SubSample", "getSample");
128                 exit(1);
129         }
130 }       
131 //**********************************************************************************************************************
132 int SubSample::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
133         try {
134                 
135                 vector<SharedRAbundVector*> newLookup;
136                 for (int i = 0; i < thislookup.size(); i++) {
137                         SharedRAbundVector* temp = new SharedRAbundVector();
138                         temp->setLabel(thislookup[i]->getLabel());
139                         temp->setGroup(thislookup[i]->getGroup());
140                         newLookup.push_back(temp);
141                 }
142                 
143                 //for each bin
144                 vector<string> newBinLabels;
145                 string snumBins = toString(thislookup[0]->getNumBins());
146                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
147                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
148                         
149                         //look at each sharedRabund and make sure they are not all zero
150                         bool allZero = true;
151                         for (int j = 0; j < thislookup.size(); j++) {
152                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
153                         }
154                         
155                         //if they are not all zero add this bin
156                         if (!allZero) {
157                                 for (int j = 0; j < thislookup.size(); j++) {
158                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
159                                 }
160                                 //if there is a bin label use it otherwise make one
161                                 string binLabel = "Otu";
162                                 string sbinNumber = toString(i+1);
163                                 if (sbinNumber.length() < snumBins.length()) { 
164                                         int diff = snumBins.length() - sbinNumber.length();
165                                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
166                                 }
167                                 binLabel += sbinNumber; 
168                                 if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
169                                 
170                                 newBinLabels.push_back(binLabel);
171                         }
172                 }
173                 
174                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
175                 thislookup.clear();
176                 
177                 thislookup = newLookup;
178                 m->currentBinLabels = newBinLabels;
179                 
180                 return 0;
181                 
182         }
183         catch(exception& e) {
184                 m->errorOut(e, "SubSample", "eliminateZeroOTUS");
185                 exit(1);
186         }
187 }
188
189
190 //**********************************************************************************************************************
191
192