]> git.donarmstrong.com Git - mothur.git/blob - rarefact.cpp
changed random forest output filename
[mothur.git] / rarefact.cpp
1 /*
2  *  rarefact.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 11/18/08.
6  *  Copyright 2008 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "rarefact.h"
11 //#include "ordervector.hpp"
12
13 /***********************************************************************/
14
15 int Rarefact::getCurve(float percentFreq = 0.01, int nIters = 1000){
16         try {
17                 RarefactionCurveData* rcd = new RarefactionCurveData();
18                 for(int i=0;i<displays.size();i++){
19                         rcd->registerDisplay(displays[i]);
20                 }
21                 
22                 //convert freq percentage to number
23                 int increment = 1;
24                 if (percentFreq < 1.0) {  increment = numSeqs * percentFreq;  }
25                 else { increment = percentFreq;  }      
26                 
27                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
28                                 if(processors == 1){
29                                         driver(rcd, increment, nIters); 
30                                 }else{
31                                         vector<int> procIters;
32                                         
33                                         int numItersPerProcessor = nIters / processors;
34                                         
35                                         //divide iters between processes
36                                         for (int i = 0; i < processors; i++) {
37                                                 if(i == processors - 1){
38                                                         numItersPerProcessor = nIters - i * numItersPerProcessor;
39                                                 }
40                                                 procIters.push_back(numItersPerProcessor);
41                                         }
42                                         
43                                         createProcesses(procIters, rcd, increment); 
44                                 }
45
46                 #else
47                         driver(rcd, increment, nIters); 
48                 #endif
49
50                 for(int i=0;i<displays.size();i++){
51                         displays[i]->close();
52                 }
53                 
54                 delete rcd;
55                 return 0;
56         }
57         catch(exception& e) {
58                 m->errorOut(e, "Rarefact", "getCurve");
59                 exit(1);
60         }
61 }
62 /***********************************************************************/
63 int Rarefact::driver(RarefactionCurveData* rcd, int increment, int nIters = 1000){
64         try {
65                         
66                 for(int iter=0;iter<nIters;iter++){
67                 
68                         for(int i=0;i<displays.size();i++){
69                                 displays[i]->init(label);
70                         }
71                 
72                         RAbundVector* lookup    = new RAbundVector(order->getNumBins());
73                         SAbundVector* rank      = new SAbundVector(order->getMaxRank()+1);
74                         random_shuffle(order->begin(), order->end());
75                 
76                         for(int i=0;i<numSeqs;i++){
77                         
78                                 if (m->control_pressed) { delete lookup; delete rank; delete rcd; return 0;  }
79                         
80                                 int binNumber = order->get(i);
81                                 int abundance = lookup->get(binNumber);
82                         
83                                 rank->set(abundance, rank->get(abundance)-1);
84                                 abundance++;
85                 
86                                 lookup->set(binNumber, abundance);
87                                 rank->set(abundance, rank->get(abundance)+1);
88
89                                 if((i == 0) || ((i+1) % increment == 0) || (ends.count(i+1) != 0)){
90                                         rcd->updateRankData(rank);
91                                 }
92                         }
93         
94                         if((numSeqs % increment != 0) || (ends.count(numSeqs) != 0)){
95                                 rcd->updateRankData(rank);
96                         }
97
98                         for(int i=0;i<displays.size();i++){
99                                 displays[i]->reset();
100                         }
101                         
102                         delete lookup;
103                         delete rank;
104                 }
105
106                 return 0;
107         }
108         catch(exception& e) {
109                 m->errorOut(e, "Rarefact", "driver");
110                 exit(1);
111         }
112 }
113 /**************************************************************************************************/
114
115 int Rarefact::createProcesses(vector<int>& procIters, RarefactionCurveData* rcd, int increment) {
116         try {
117 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
118                 int process = 1;
119                 
120                 vector<int> processIDS;
121                 
122                 EstOutput results;
123                 
124                 //loop through and create all the processes you want
125                 while (process != processors) {
126                         int pid = fork();
127                         
128                         if (pid > 0) {
129                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
130                                 process++;
131                         }else if (pid == 0){
132                                 driver(rcd, increment, procIters[process]);
133                         
134                                 //pass numSeqs to parent
135                                 for(int i=0;i<displays.size();i++){
136                                         string tempFile = toString(getpid()) + toString(i) + ".rarefact.temp";
137                                         displays[i]->outputTempFiles(tempFile);
138                                 }
139                                 exit(0);
140                         }else { 
141                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
142                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
143                                 exit(0);
144                         }
145                 }
146                 
147                 driver(rcd, increment, procIters[0]);
148                 
149                 //force parent to wait until all the processes are done
150                 for (int i=0;i<(processors-1);i++) { 
151                         int temp = processIDS[i];
152                         wait(&temp);
153                 }
154                 
155                 //get data created by processes
156                 for (int i=0;i<(processors-1);i++) { 
157                         for(int j=0;j<displays.size();j++){
158                                 string s = toString(processIDS[i]) + toString(j) + ".rarefact.temp";
159                                 displays[j]->inputTempFiles(s);
160                                 m->mothurRemove(s);
161                         }
162                 }
163                 
164                 return 0;
165 #endif          
166         }
167         catch(exception& e) {
168                 m->errorOut(e, "Rarefact", "createProcesses");
169                 exit(1);
170         }
171 }
172 /***********************************************************************/
173
174 int Rarefact::getSharedCurve(float percentFreq = 0.01, int nIters = 1000){
175 try {
176                 SharedRarefactionCurveData* rcd = new SharedRarefactionCurveData();
177                 
178                 label = lookup[0]->getLabel();
179                 
180                 //register the displays
181                 for(int i=0;i<displays.size();i++){
182                         rcd->registerDisplay(displays[i]);
183                 }
184                 
185                 //if jumble is false all iters will be the same
186                 if (m->jumble == false)  {  nIters = 1;  }
187                 
188                 //convert freq percentage to number
189                 int increment = 1;
190                 if (percentFreq < 1.0) {  increment = numSeqs * percentFreq;  }
191                 else { increment = percentFreq;  }
192                 
193                 for(int iter=0;iter<nIters;iter++){
194                 
195                         for(int i=0;i<displays.size();i++){
196                                 displays[i]->init(label);                 
197                         }
198                         
199                         if (m->jumble == true)  {
200                                 //randomize the groups
201                                 random_shuffle(lookup.begin(), lookup.end());
202                         }
203                         
204                         //make merge the size of lookup[0]
205                         SharedRAbundVector* merge = new SharedRAbundVector(lookup[0]->size());
206                         
207                         //make copy of lookup zero
208                         for(int i = 0; i<lookup[0]->size(); i++) {
209                                 merge->set(i, lookup[0]->getAbundance(i), "merge");
210                         }
211                         
212                         vector<SharedRAbundVector*> subset;
213                         //send each group one at a time
214                         for (int k = 0; k < lookup.size(); k++) { 
215                                 if (m->control_pressed) {  delete merge; delete rcd; return 0;  }
216                                 
217                                 subset.clear(); //clears out old pair of sharedrabunds
218                                 //add in new pair of sharedrabunds
219                                 subset.push_back(merge); subset.push_back(lookup[k]);
220                                 
221                                 rcd->updateSharedData(subset, k+1, numGroupComb);
222                                 mergeVectors(merge, lookup[k]);
223                         }
224
225                         //resets output files
226                         for(int i=0;i<displays.size();i++){
227                                 displays[i]->reset();
228                         }
229                         
230                         delete merge;
231                 }
232                 
233                 for(int i=0;i<displays.size();i++){
234                         displays[i]->close();
235                 }
236                 
237                 delete rcd;
238                 return 0;
239         }
240         catch(exception& e) {
241                 m->errorOut(e, "Rarefact", "getSharedCurve");
242                 exit(1);
243         }
244 }
245
246 /**************************************************************************************/
247 void Rarefact::mergeVectors(SharedRAbundVector* shared1, SharedRAbundVector* shared2) {
248         try{
249                 for (int k = 0; k < shared1->size(); k++) {
250                         //merge new species into shared1
251                         shared1->set(k, (shared1->getAbundance(k) + shared2->getAbundance(k)), "combo");  //set to 'combo' since this vector now contains multiple groups
252                 }
253         }
254         catch(exception& e) {
255                 m->errorOut(e, "Rarefact", "mergeVectors");
256                 exit(1);
257         }
258 }
259