5 // Created by Sarah Westcott on 4/2/12.
6 // Copyright (c) 2012 Schloss Lab. All rights reserved.
10 //**********************************************************************************************************************
11 Tree* SubSample::getSample(Tree* T, TreeMap* tmap, map<string, string> whole, int size, map<string, string> originalNameMap) {
15 vector<string> subsampledSeqs = getSample(tmap, size);
16 map<string, string> sampledNameMap = deconvolute(whole, subsampledSeqs);
18 //remove seqs not in sample from treemap
19 for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
20 //is that name in the subsample?
22 string name = tmap->namesOfSeqs[i];
23 for (int j = 0; j < subsampledSeqs.size(); j++) {
24 if (name == subsampledSeqs[j]) { break; } //found it
28 if (m->control_pressed) { return newTree; }
30 //if you didnt find it, remove it
31 if (count == subsampledSeqs.size()) {
32 tmap->removeSeq(name);
33 tmap->addSeq(name, "doNotIncludeMe");
38 int numUniques = sampledNameMap.size();
39 if (sampledNameMap.size() == 0) { numUniques = subsampledSeqs.size(); }
41 newTree = new Tree(tmap);
42 newTree->getCopy(T, originalNameMap, subsampledSeqs);
47 m->errorOut(e, "SubSample", "getSample-Tree");
51 //**********************************************************************************************************************
52 Tree* SubSample::getSample(Tree* T, TreeMap* tmap, map<string, string> whole, int size) {
56 vector<string> subsampledSeqs = getSample(tmap, size);
57 map<string, string> sampledNameMap = deconvolute(whole, subsampledSeqs);
59 //remove seqs not in sample from treemap
60 for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
61 //is that name in the subsample?
63 for (int j = 0; j < subsampledSeqs.size(); j++) {
64 if (tmap->namesOfSeqs[i] == subsampledSeqs[j]) { break; } //found it
68 if (m->control_pressed) { return newTree; }
70 //if you didnt find it, remove it
71 if (count == subsampledSeqs.size()) {
72 tmap->removeSeq(tmap->namesOfSeqs[i]);
73 i--; //need this because removeSeq removes name from namesOfSeqs
78 int numUniques = sampledNameMap.size();
79 if (sampledNameMap.size() == 0) { numUniques = subsampledSeqs.size(); }
81 newTree = new Tree(numUniques, tmap); //numNodes, treemap
82 newTree->getSubTree(T, subsampledSeqs, sampledNameMap);
87 m->errorOut(e, "SubSample", "getSample-Tree");
91 //**********************************************************************************************************************
92 //assumes whole maps dupName -> uniqueName
93 map<string, string> SubSample::deconvolute(map<string, string> whole, vector<string>& wanted) {
95 map<string, string> nameMap;
97 //whole will be empty if user gave no name file, so we don't need to make a new one
98 if (whole.size() == 0) { return nameMap; }
100 vector<string> newWanted;
101 for (int i = 0; i < wanted.size(); i++) {
103 if (m->control_pressed) { break; }
105 string dupName = wanted[i];
107 map<string, string>::iterator itWhole = whole.find(dupName);
108 if (itWhole != whole.end()) {
109 string repName = itWhole->second;
111 //do we already have this rep?
112 map<string, string>::iterator itName = nameMap.find(repName);
113 if (itName != nameMap.end()) { //add this seqs to dups list
114 (itName->second) += "," + dupName;
115 }else { //first sighting of this seq
116 nameMap[repName] = dupName;
117 newWanted.push_back(repName);
119 }else { m->mothurOut("[ERROR]: "+dupName+" is not in your name file, please correct.\n"); m->control_pressed = true; }
125 catch(exception& e) {
126 m->errorOut(e, "SubSample", "deconvolute");
130 //**********************************************************************************************************************
131 vector<string> SubSample::getSample(TreeMap* tMap, int size) {
133 vector<string> sample;
135 vector<string> Groups = tMap->getNamesOfGroups();
136 for (int i = 0; i < Groups.size(); i++) {
138 if (m->inUsersGroups(Groups[i], m->getGroups())) {
139 if (m->control_pressed) { break; }
141 vector<string> thisGroup; thisGroup.push_back(Groups[i]);
142 vector<string> thisGroupsSeqs = tMap->getNamesSeqs(thisGroup);
143 int thisSize = thisGroupsSeqs.size();
145 if (thisSize >= size) {
147 random_shuffle(thisGroupsSeqs.begin(), thisGroupsSeqs.end());
149 for (int j = 0; j < size; j++) { sample.push_back(thisGroupsSeqs[j]); }
150 }else { m->mothurOut("[ERROR]: You have selected a size that is larger than "+Groups[i]+" number of sequences.\n"); m->control_pressed = true; }
156 catch(exception& e) {
157 m->errorOut(e, "SubSample", "getSample-TreeMap");
161 //**********************************************************************************************************************
162 vector<string> SubSample::getSample(TreeMap* tMap, vector<string> Groups) {
164 vector<string> sample;
166 //vector<string> Groups = tMap->getNamesOfGroups();
167 for (int i = 0; i < Groups.size(); i++) {
169 if (m->control_pressed) { break; }
171 vector<string> thisGroup; thisGroup.push_back(Groups[i]);
172 vector<string> thisGroupsSeqs = tMap->getNamesSeqs(thisGroup);
173 int thisSize = thisGroupsSeqs.size();
175 for (int j = 0; j < thisSize; j++) { sample.push_back(thisGroupsSeqs[j]); }
180 catch(exception& e) {
181 m->errorOut(e, "SubSample", "getSample-TreeMap");
185 //**********************************************************************************************************************
186 vector<string> SubSample::getSample(vector<SharedRAbundVector*>& thislookup, int size) {
189 //save mothurOut's binLabels to restore for next label
190 vector<string> saveBinLabels = m->currentBinLabels;
192 int numBins = thislookup[0]->getNumBins();
193 for (int i = 0; i < thislookup.size(); i++) {
194 int thisSize = thislookup[i]->getNumSeqs();
196 if (thisSize != size) {
198 string thisgroup = thislookup[i]->getGroup();
201 for(int p=0;p<numBins;p++){
202 for(int j=0;j<thislookup[i]->getAbundance(p);j++){
206 random_shuffle(order.begin(), order.end());
208 SharedRAbundVector* temp = new SharedRAbundVector(numBins);
209 temp->setLabel(thislookup[i]->getLabel());
210 temp->setGroup(thislookup[i]->getGroup());
212 delete thislookup[i];
213 thislookup[i] = temp;
216 for (int j = 0; j < size; j++) {
218 if (m->control_pressed) { return m->currentBinLabels; }
220 int bin = order.get(j);
222 int abund = thislookup[i]->getAbundance(bin);
223 thislookup[i]->set(bin, (abund+1), thisgroup);
228 //subsampling may have created some otus with no sequences in them
229 eliminateZeroOTUS(thislookup);
231 if (m->control_pressed) { return m->currentBinLabels; }
233 //save mothurOut's binLabels to restore for next label
234 vector<string> subsampleBinLabels = m->currentBinLabels;
235 m->currentBinLabels = saveBinLabels;
237 return subsampleBinLabels;
240 catch(exception& e) {
241 m->errorOut(e, "SubSample", "getSample-shared");
245 //**********************************************************************************************************************
246 int SubSample::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
249 vector<SharedRAbundVector*> newLookup;
250 for (int i = 0; i < thislookup.size(); i++) {
251 SharedRAbundVector* temp = new SharedRAbundVector();
252 temp->setLabel(thislookup[i]->getLabel());
253 temp->setGroup(thislookup[i]->getGroup());
254 newLookup.push_back(temp);
258 vector<string> newBinLabels;
259 string snumBins = toString(thislookup[0]->getNumBins());
260 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
261 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
263 //look at each sharedRabund and make sure they are not all zero
265 for (int j = 0; j < thislookup.size(); j++) {
266 if (thislookup[j]->getAbundance(i) != 0) { allZero = false; break; }
269 //if they are not all zero add this bin
271 for (int j = 0; j < thislookup.size(); j++) {
272 newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
274 //if there is a bin label use it otherwise make one
275 string binLabel = "Otu";
276 string sbinNumber = toString(i+1);
277 if (sbinNumber.length() < snumBins.length()) {
278 int diff = snumBins.length() - sbinNumber.length();
279 for (int h = 0; h < diff; h++) { binLabel += "0"; }
281 binLabel += sbinNumber;
282 if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; }
284 newBinLabels.push_back(binLabel);
288 for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; }
291 thislookup = newLookup;
292 m->currentBinLabels = newBinLabels;
297 catch(exception& e) {
298 m->errorOut(e, "SubSample", "eliminateZeroOTUS");
304 //**********************************************************************************************************************