]> git.donarmstrong.com Git - mothur.git/blob - rarefact.cpp
paralellized rarefaction.single
[mothur.git] / rarefact.cpp
1 /*
2  *  rarefact.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 11/18/08.
6  *  Copyright 2008 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "rarefact.h"
11 //#include "ordervector.hpp"
12
13 /***********************************************************************/
14
15 int Rarefact::getCurve(float percentFreq = 0.01, int nIters = 1000){
16         try {
17                 RarefactionCurveData* rcd = new RarefactionCurveData();
18                 for(int i=0;i<displays.size();i++){
19                         rcd->registerDisplay(displays[i]);
20                 }
21                 
22                 //convert freq percentage to number
23                 int increment = 1;
24                 if (percentFreq < 1.0) {  increment = numSeqs * percentFreq;  }
25                 else { increment = percentFreq;  }      
26                 
27                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
28                                 if(processors == 1){
29                                         driver(rcd, increment, nIters); 
30                                 }else{
31                                         vector<int> procIters;
32                                         
33                                         int numItersPerProcessor = nIters / processors;
34                                         
35                                         //divide iters between processes
36                                         for (int i = 0; i < processors; i++) {
37                                                 if(i == processors - 1){
38                                                         numItersPerProcessor = nIters - i * numItersPerProcessor;
39                                                 }
40                                                 procIters.push_back(numItersPerProcessor);
41                                         }
42                                         
43                                         createProcesses(procIters, rcd, increment); 
44                                 }
45
46                 #else
47                         driver(rcd, increment, nIters); 
48                 #endif
49
50                 for(int i=0;i<displays.size();i++){
51                         displays[i]->close();
52                 }
53                 
54                 delete rcd;
55                 return 0;
56         }
57         catch(exception& e) {
58                 m->errorOut(e, "Rarefact", "getCurve");
59                 exit(1);
60         }
61 }
62 /***********************************************************************/
63 int Rarefact::driver(RarefactionCurveData* rcd, int increment, int nIters = 1000){
64         try {
65                         
66                 for(int iter=0;iter<nIters;iter++){
67                 
68                         for(int i=0;i<displays.size();i++){
69                                 displays[i]->init(label);
70                         }
71                 
72                         RAbundVector* lookup    = new RAbundVector(order->getNumBins());
73                         SAbundVector* rank      = new SAbundVector(order->getMaxRank()+1);
74                         random_shuffle(order->begin(), order->end());
75                 
76                         for(int i=0;i<numSeqs;i++){
77                         
78                                 if (m->control_pressed) { delete lookup; delete rank; delete rcd; return 0;  }
79                         
80                                 int binNumber = order->get(i);
81                                 int abundance = lookup->get(binNumber);
82                         
83                                 rank->set(abundance, rank->get(abundance)-1);
84                                 abundance++;
85                 
86                                 lookup->set(binNumber, abundance);
87                                 rank->set(abundance, rank->get(abundance)+1);
88
89                                 if((i == 0) || (i+1) % increment == 0){
90                                         rcd->updateRankData(rank);
91                                 }
92                         }
93         
94                         if(numSeqs % increment != 0){
95                                 rcd->updateRankData(rank);
96                         }
97
98                         for(int i=0;i<displays.size();i++){
99                                 displays[i]->reset();
100                         }
101                         
102                         delete lookup;
103                         delete rank;
104                 }
105
106                 return 0;
107         }
108         catch(exception& e) {
109                 m->errorOut(e, "Rarefact", "driver");
110                 exit(1);
111         }
112 }
113 /**************************************************************************************************/
114
115 int Rarefact::createProcesses(vector<int>& procIters, RarefactionCurveData* rcd, int increment) {
116         try {
117 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
118                 int process = 1;
119                 int num = 0;
120                 vector<int> processIDS;
121                 
122                 EstOutput results;
123                 
124                 //loop through and create all the processes you want
125                 while (process != processors) {
126                         int pid = fork();
127                         
128                         if (pid > 0) {
129                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
130                                 process++;
131                         }else if (pid == 0){
132                                 driver(rcd, increment, procIters[process]);
133                         
134                                 //pass numSeqs to parent
135                                 for(int i=0;i<displays.size();i++){
136                                         string tempFile = toString(getpid()) + toString(i) + ".rarefact.temp";
137                                         displays[i]->outputTempFiles(tempFile);
138                                 }
139                                 exit(0);
140                         }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
141                 }
142                 
143                 driver(rcd, increment, procIters[0]);
144                 
145                 //force parent to wait until all the processes are done
146                 for (int i=0;i<(processors-1);i++) { 
147                         int temp = processIDS[i];
148                         wait(&temp);
149                 }
150                 
151                 //get data created by processes
152                 for (int i=0;i<(processors-1);i++) { 
153                         for(int j=0;j<displays.size();j++){
154                                 string s = toString(processIDS[i]) + toString(j) + ".rarefact.temp";
155                                 displays[j]->inputTempFiles(s);
156                                 remove(s.c_str());
157                         }
158                 }
159                 
160                 return 0;
161 #endif          
162         }
163         catch(exception& e) {
164                 m->errorOut(e, "Rarefact", "createProcesses");
165                 exit(1);
166         }
167 }
168 /***********************************************************************/
169
170 int Rarefact::getSharedCurve(float percentFreq = 0.01, int nIters = 1000){
171 try {
172                 SharedRarefactionCurveData* rcd = new SharedRarefactionCurveData();
173                 
174                 label = lookup[0]->getLabel();
175                 
176                 //register the displays
177                 for(int i=0;i<displays.size();i++){
178                         rcd->registerDisplay(displays[i]);
179                 }
180                 
181                 //if jumble is false all iters will be the same
182                 if (globaldata->jumble == false)  {  nIters = 1;  }
183                 
184                 //convert freq percentage to number
185                 int increment = 1;
186                 if (percentFreq < 1.0) {  increment = numSeqs * percentFreq;  }
187                 else { increment = percentFreq;  }
188                 
189                 for(int iter=0;iter<nIters;iter++){
190                 
191                         for(int i=0;i<displays.size();i++){
192                                 displays[i]->init(label);                 
193                         }
194                         
195                         if (globaldata->jumble == true)  {
196                                 //randomize the groups
197                                 random_shuffle(lookup.begin(), lookup.end());
198                         }
199                         
200                         //make merge the size of lookup[0]
201                         SharedRAbundVector* merge = new SharedRAbundVector(lookup[0]->size());
202                         
203                         //make copy of lookup zero
204                         for(int i = 0; i<lookup[0]->size(); i++) {
205                                 merge->set(i, lookup[0]->getAbundance(i), "merge");
206                         }
207                         
208                         vector<SharedRAbundVector*> subset;
209                         //send each group one at a time
210                         for (int k = 0; k < lookup.size(); k++) { 
211                                 if (m->control_pressed) {  delete merge; delete rcd; return 0;  }
212                                 
213                                 subset.clear(); //clears out old pair of sharedrabunds
214                                 //add in new pair of sharedrabunds
215                                 subset.push_back(merge); subset.push_back(lookup[k]);
216                                 
217                                 rcd->updateSharedData(subset, k+1, numGroupComb);
218                                 mergeVectors(merge, lookup[k]);
219                         }
220
221                         //resets output files
222                         for(int i=0;i<displays.size();i++){
223                                 displays[i]->reset();
224                         }
225                         
226                         delete merge;
227                 }
228                 
229                 for(int i=0;i<displays.size();i++){
230                         displays[i]->close();
231                 }
232                 
233                 delete rcd;
234                 return 0;
235         }
236         catch(exception& e) {
237                 m->errorOut(e, "Rarefact", "getSharedCurve");
238                 exit(1);
239         }
240 }
241
242 /**************************************************************************************/
243 void Rarefact::mergeVectors(SharedRAbundVector* shared1, SharedRAbundVector* shared2) {
244         try{
245                 for (int k = 0; k < shared1->size(); k++) {
246                         //merge new species into shared1
247                         shared1->set(k, (shared1->getAbundance(k) + shared2->getAbundance(k)), "combo");  //set to 'combo' since this vector now contains multiple groups
248                 }
249         }
250         catch(exception& e) {
251                 m->errorOut(e, "Rarefact", "mergeVectors");
252                 exit(1);
253         }
254 }
255