5 // Created by Sarah Westcott on 4/2/12.
6 // Copyright (c) 2012 Schloss Lab. All rights reserved.
10 //**********************************************************************************************************************
11 Tree* SubSample::getSample(Tree* T, TreeMap* tmap, TreeMap* newTmap, int size, map<string, string> originalNameMap) {
15 map<string, vector<string> > newGroups;
16 vector<string> subsampledSeqs = getSample(tmap, size, newGroups);
18 //remove seqs not in sample from treemap
19 for (map<string, vector<string> >::iterator it = newGroups.begin(); it != newGroups.end(); it++) {
20 for (int i = 0; i < (it->second).size(); i++) {
21 newTmap->addSeq((it->second)[i], it->first);
25 newTree = new Tree(newTmap);
26 newTree->getCopy(T, originalNameMap);
31 m->errorOut(e, "SubSample", "getSample-Tree");
35 /**********************************************************************************************************************
36 Tree* SubSample::getSample(Tree* T, TreeMap* tmap, map<string, string> whole, int size) {
40 vector<string> subsampledSeqs = getSample(tmap, size);
41 map<string, string> sampledNameMap = deconvolute(whole, subsampledSeqs);
43 //remove seqs not in sample from treemap
44 for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
45 //is that name in the subsample?
47 for (int j = 0; j < subsampledSeqs.size(); j++) {
48 if (tmap->namesOfSeqs[i] == subsampledSeqs[j]) { break; } //found it
52 if (m->control_pressed) { return newTree; }
54 //if you didnt find it, remove it
55 if (count == subsampledSeqs.size()) {
56 tmap->removeSeq(tmap->namesOfSeqs[i]);
57 i--; //need this because removeSeq removes name from namesOfSeqs
62 int numUniques = sampledNameMap.size();
63 if (sampledNameMap.size() == 0) { numUniques = subsampledSeqs.size(); }
65 newTree = new Tree(numUniques, tmap); //numNodes, treemap
66 newTree->getSubTree(T, subsampledSeqs, sampledNameMap);
71 m->errorOut(e, "SubSample", "getSample-Tree");
75 //**********************************************************************************************************************
76 //assumes whole maps dupName -> uniqueName
77 map<string, string> SubSample::deconvolute(map<string, string> whole, vector<string>& wanted) {
79 map<string, string> nameMap;
81 //whole will be empty if user gave no name file, so we don't need to make a new one
82 if (whole.size() == 0) { return nameMap; }
84 vector<string> newWanted;
85 for (int i = 0; i < wanted.size(); i++) {
87 if (m->control_pressed) { break; }
89 string dupName = wanted[i];
91 map<string, string>::iterator itWhole = whole.find(dupName);
92 if (itWhole != whole.end()) {
93 string repName = itWhole->second;
95 //do we already have this rep?
96 map<string, string>::iterator itName = nameMap.find(repName);
97 if (itName != nameMap.end()) { //add this seqs to dups list
98 (itName->second) += "," + dupName;
99 }else { //first sighting of this seq
100 nameMap[repName] = dupName;
101 newWanted.push_back(repName);
103 }else { m->mothurOut("[ERROR]: "+dupName+" is not in your name file, please correct.\n"); m->control_pressed = true; }
109 catch(exception& e) {
110 m->errorOut(e, "SubSample", "deconvolute");
114 //**********************************************************************************************************************
115 vector<string> SubSample::getSample(TreeMap* tMap, int size, map<string, vector<string> >& sample) {
117 vector<string> temp2;
118 sample["doNotIncludeMe"] = temp2;
120 vector<string> namesInSample;
122 vector<string> Groups = tMap->getNamesOfGroups();
123 for (int i = 0; i < Groups.size(); i++) {
125 if (m->inUsersGroups(Groups[i], m->getGroups())) {
126 if (m->control_pressed) { break; }
128 vector<string> thisGroup; thisGroup.push_back(Groups[i]);
129 vector<string> thisGroupsSeqs = tMap->getNamesSeqs(thisGroup);
130 int thisSize = thisGroupsSeqs.size();
132 sample[Groups[i]] = temp;
134 if (thisSize >= size) {
136 random_shuffle(thisGroupsSeqs.begin(), thisGroupsSeqs.end());
138 for (int j = 0; j < size; j++) { sample[Groups[i]].push_back(thisGroupsSeqs[j]); namesInSample.push_back(thisGroupsSeqs[j]); }
139 for (int j = size; j < thisSize; j++) { sample["doNotIncludeMe"].push_back(thisGroupsSeqs[j]); }
141 }else { m->mothurOut("[ERROR]: You have selected a size that is larger than "+Groups[i]+" number of sequences.\n"); m->control_pressed = true; }
145 return namesInSample;
147 catch(exception& e) {
148 m->errorOut(e, "SubSample", "getSample-TreeMap");
153 //**********************************************************************************************************************
154 vector<string> SubSample::getSample(TreeMap* tMap, int size) {
156 vector<string> sample;
158 vector<string> Groups = tMap->getNamesOfGroups();
159 for (int i = 0; i < Groups.size(); i++) {
161 if (m->inUsersGroups(Groups[i], m->getGroups())) {
162 if (m->control_pressed) { break; }
164 vector<string> thisGroup; thisGroup.push_back(Groups[i]);
165 vector<string> thisGroupsSeqs = tMap->getNamesSeqs(thisGroup);
166 int thisSize = thisGroupsSeqs.size();
168 if (thisSize >= size) {
170 random_shuffle(thisGroupsSeqs.begin(), thisGroupsSeqs.end());
172 for (int j = 0; j < size; j++) { sample.push_back(thisGroupsSeqs[j]); }
173 }else { m->mothurOut("[ERROR]: You have selected a size that is larger than "+Groups[i]+" number of sequences.\n"); m->control_pressed = true; }
179 catch(exception& e) {
180 m->errorOut(e, "SubSample", "getSample-TreeMap");
184 //**********************************************************************************************************************
185 vector<string> SubSample::getSample(TreeMap* tMap, vector<string> Groups) {
187 vector<string> sample;
189 //vector<string> Groups = tMap->getNamesOfGroups();
190 for (int i = 0; i < Groups.size(); i++) {
192 if (m->control_pressed) { break; }
194 vector<string> thisGroup; thisGroup.push_back(Groups[i]);
195 vector<string> thisGroupsSeqs = tMap->getNamesSeqs(thisGroup);
196 int thisSize = thisGroupsSeqs.size();
198 for (int j = 0; j < thisSize; j++) { sample.push_back(thisGroupsSeqs[j]); }
203 catch(exception& e) {
204 m->errorOut(e, "SubSample", "getSample-TreeMap");
208 //**********************************************************************************************************************
209 vector<string> SubSample::getSample(vector<SharedRAbundVector*>& thislookup, int size) {
212 //save mothurOut's binLabels to restore for next label
213 vector<string> saveBinLabels = m->currentBinLabels;
215 int numBins = thislookup[0]->getNumBins();
216 for (int i = 0; i < thislookup.size(); i++) {
217 int thisSize = thislookup[i]->getNumSeqs();
219 if (thisSize != size) {
221 string thisgroup = thislookup[i]->getGroup();
224 for(int p=0;p<numBins;p++){
225 for(int j=0;j<thislookup[i]->getAbundance(p);j++){
229 random_shuffle(order.begin(), order.end());
231 SharedRAbundVector* temp = new SharedRAbundVector(numBins);
232 temp->setLabel(thislookup[i]->getLabel());
233 temp->setGroup(thislookup[i]->getGroup());
235 delete thislookup[i];
236 thislookup[i] = temp;
239 for (int j = 0; j < size; j++) {
241 if (m->control_pressed) { return m->currentBinLabels; }
243 int bin = order.get(j);
245 int abund = thislookup[i]->getAbundance(bin);
246 thislookup[i]->set(bin, (abund+1), thisgroup);
251 //subsampling may have created some otus with no sequences in them
252 eliminateZeroOTUS(thislookup);
254 if (m->control_pressed) { return m->currentBinLabels; }
256 //save mothurOut's binLabels to restore for next label
257 vector<string> subsampleBinLabels = m->currentBinLabels;
258 m->currentBinLabels = saveBinLabels;
260 return subsampleBinLabels;
263 catch(exception& e) {
264 m->errorOut(e, "SubSample", "getSample-shared");
268 //**********************************************************************************************************************
269 int SubSample::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
272 vector<SharedRAbundVector*> newLookup;
273 for (int i = 0; i < thislookup.size(); i++) {
274 SharedRAbundVector* temp = new SharedRAbundVector();
275 temp->setLabel(thislookup[i]->getLabel());
276 temp->setGroup(thislookup[i]->getGroup());
277 newLookup.push_back(temp);
281 vector<string> newBinLabels;
282 string snumBins = toString(thislookup[0]->getNumBins());
283 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
284 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
286 //look at each sharedRabund and make sure they are not all zero
288 for (int j = 0; j < thislookup.size(); j++) {
289 if (thislookup[j]->getAbundance(i) != 0) { allZero = false; break; }
292 //if they are not all zero add this bin
294 for (int j = 0; j < thislookup.size(); j++) {
295 newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
297 //if there is a bin label use it otherwise make one
298 string binLabel = "Otu";
299 string sbinNumber = toString(i+1);
300 if (sbinNumber.length() < snumBins.length()) {
301 int diff = snumBins.length() - sbinNumber.length();
302 for (int h = 0; h < diff; h++) { binLabel += "0"; }
304 binLabel += sbinNumber;
305 if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; }
307 newBinLabels.push_back(binLabel);
311 for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; }
314 thislookup = newLookup;
315 m->currentBinLabels = newBinLabels;
320 catch(exception& e) {
321 m->errorOut(e, "SubSample", "eliminateZeroOTUS");
325 //**********************************************************************************************************************
326 int SubSample::getSample(SAbundVector*& sabund, int size) {
329 OrderVector* order = new OrderVector();
330 *order = sabund->getOrderVector(NULL);
332 int numBins = order->getNumBins();
333 int thisSize = order->getNumSeqs();
335 if (thisSize > size) {
336 random_shuffle(order->begin(), order->end());
338 RAbundVector* rabund = new RAbundVector(numBins);
339 rabund->setLabel(order->getLabel());
341 for (int j = 0; j < size; j++) {
343 if (m->control_pressed) { delete order; delete rabund; return 0; }
345 int bin = order->get(j);
347 int abund = rabund->get(bin);
348 rabund->set(bin, (abund+1));
352 sabund = new SAbundVector();
353 *sabund = rabund->getSAbundVector();
356 }else if (thisSize < size) { m->mothurOut("[ERROR]: The size you requested is larger than the number of sequences in the sabund vector. You requested " + toString(size) + " and you only have " + toString(thisSize) + " seqs in your sabund vector.\n"); m->control_pressed = true; }
358 if (m->control_pressed) { return 0; }
365 catch(exception& e) {
366 m->errorOut(e, "SubSampleCommand", "getSample");
370 //**********************************************************************************************************************